In [1]:
import pandas as pd
import requests
import time
import re
from collections import Counter
import google.generativeai as genai
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

In [2]:
df = pd.read_csv("Top-journals.csv")
df

Unnamed: 0,Journal,Link,Field
0,IEEE Transactions on Neural Networks and Learn...,https://cis.ieee.org/publications/t-neural-net...,AI/ML
1,IEEE Transactions on Pattern Analysis and Mach...,https://www.computer.org/csdl/journal/tp,AI/ML
2,Journal of Machine Learning Research,https://www.jmlr.org/,AI/ML
3,Nature Machine Intelligence,https://www.nature.com/natmachintell/,AI/ML
4,Journal of Artificial Intelligence Research (J...,https://jair.org/index.php/jair,AI/ML
5,International Journal of Computer Vision,https://www.springer.com/journal/11263,AI/ML
6,Pattern Recognition,https://www.journals.elsevier.com/pattern-reco...,AI/ML
7,Expert Systems with Applications,https://www.journals.elsevier.com/expert-syste...,AI/ML
8,Neurocomputing,https://www.journals.elsevier.com/neurocomputing,AI/ML
9,Knowledge-Based Systems,https://www.journals.elsevier.com/knowledge-ba...,AI/ML


In [3]:
conferences = df

In [4]:
conferences.Title = conferences.Title.astype(str)

AttributeError: 'DataFrame' object has no attribute 'Title'

In [None]:
#Let the scraping begin

In [12]:
# API Keys - Replace with your actual keys
GEMINI_API_KEY = ""
GOOGLE_SEARCH_API_KEY = ""
SEARCH_ENGINE_ID = ""

In [7]:
def google_search(query, api_key, search_engine_id, num_results=3):
    """Search Google using Custom Search API"""
    url = "https://www.googleapis.com/customsearch/v1"
    params = {
        'key': api_key,
        'cx': search_engine_id,
        'q': query,
        'num': num_results
    }
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error searching for {query}: {e}")
        return None

In [8]:
def extract_keywords_with_gemini(text):
    """Extract relevant keywords from text using Gemini API"""
    import google.generativeai as genai
    
    # Configure Gemini API
    genai.configure(api_key=GEMINI_API_KEY)
    model = genai.GenerativeModel('gemini-2.5-pro-preview-05-06')
    
    # Create prompt for keyword extraction
    prompt = f"""
Analyze the following text and extract the 10 most relevant keywords and topics.

Focus on:
- Technical or specialized subject areas discussed
- Industry sectors or application domains referenced
- Major themes, challenges, or areas of focus
- Important methods, tools, or approaches mentioned

Return only a concise, comma-separated list of 5-10 highly relevant keywords or topics that best summarize the content. Do not include explanations.

Text to analyze:
{text[:2000]}  # Limit text length for API efficiency
"""

    
    try:
        response = model.generate_content(prompt)
        keywords_text = response.text.strip()
        
        # Clean and format keywords
        keywords = [k.strip().lower() for k in keywords_text.split(',') if k.strip()]
        return keywords[:10]  # Limit to 10 keywords max
        
    except Exception as e:
        print(f"Error with Gemini API: {e}")
        # Fallback to simple keyword extraction
        return extract_keywords_simple(text)

In [13]:
def analyze_conference_keywords(df):
    """Analyze each conference and extract keywords"""
    keywords_list = []
    
    for index, row in df.iterrows():
        print(f"Processing {index + 1}/{len(df)}: {row['Journal']}")
        
        # Search for conference information
        search_query = f"{row['Journal']} conference agenda topics"
        search_results = google_search(search_query, GOOGLE_SEARCH_API_KEY, SEARCH_ENGINE_ID)
        
        keywords = []
        if search_results and 'items' in search_results:
            # Extract text from search results
            all_text = ""
            for item in search_results['items']:
                all_text += item.get('Journal', '') + " " + item.get('snippet', '') + " "
            
            # Extract keywords from the text using Gemini
            keywords = extract_keywords_with_gemini(all_text)
        
        # If no keywords found, try searching with just the title
        if not keywords:
            search_query = f"{row['Title']}"
            search_results = google_search(search_query, GOOGLE_SEARCH_API_KEY, SEARCH_ENGINE_ID)
            
            if search_results and 'items' in search_results:
                all_text = ""
                for item in search_results['items']:
                    all_text += item.get('title', '') + " " + item.get('snippet', '') + " "
                keywords = extract_keywords_with_gemini(all_text)
        
        # Add conference type as keyword if no other keywords found
        if not keywords and 'Field' in row:
            keywords = [row['Field'].lower()]
        
        keywords_list.append(', '.join(keywords) if keywords else 'No keywords found')
        
        # Rate limiting to avoid API quota issues
        time.sleep(1)
    
    return keywords_list

In [14]:
print("\nStarting keyword extraction...")
keywords = analyze_conference_keywords(df)
df['Keywords'] = keywords


Starting keyword extraction...
Processing 1/28: IEEE Transactions on Neural Networks and Learning Systems
Processing 2/28: IEEE Transactions on Pattern Analysis and Machine Intelligence
Processing 3/28: Journal of Machine Learning Research
Processing 4/28: Nature Machine Intelligence
Processing 5/28: Journal of Artificial Intelligence Research (JAIR)
Processing 6/28: International Journal of Computer Vision
Processing 7/28: Pattern Recognition
Processing 8/28: Expert Systems with Applications
Processing 9/28: Neurocomputing
Processing 10/28: Knowledge-Based Systems
Processing 11/28: Neural Networks
Processing 12/28: Applied Soft Computing
Processing 13/28: Artificial Intelligence Review
Processing 14/28: ACM Transactions on Intelligent Systems and Technology
Processing 15/28: Foundations and Trends in Machine Learning
Processing 16/28: Science Robotics
Processing 17/28: IEEE Computational Intelligence Magazine
Processing 18/28: IEEE Transactions on Information Forensics and Security
P

In [15]:
df.to_csv("With_Keywords_Journals.csv",index = False)