In [None]:
!pip install requests pandas

In [None]:
import requests  # For making API requests
import pandas as pd  # Data handling
import time  # For handling API rate limits
import os  # For accessing environment variables
from datetime import datetime  # For handling date operations

# Load API key from an environment variable for security
SEMANTIC_SCHOLAR_API_KEY = os.getenv("SEMANTIC_SCHOLAR_API_KEY", "Insert API Key")

# Get the current year and define the past 20 years range
CURRENT_YEAR = datetime.now().year  # Get the current year
START_YEAR = CURRENT_YEAR - 20  # Define the start year as 20 years ago

# Function to fetch research papers from Semantic Scholar based on user input
def fetch_research_papers(query, max_results=100):
    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    headers = {"x-api-key": SEMANTIC_SCHOLAR_API_KEY}

    papers = []
    total_fetched = 0
    offset = 0

    while total_fetched < max_results:
        params = {
            "query": query,
            "limit": min(100, max_results - total_fetched),
            "offset": offset,
            "fields": "title,abstract,authors,year,url,citationCount,journal,venue,publicationTypes",
            "year": f"{START_YEAR}-{CURRENT_YEAR}"
        }

        response = requests.get(url, headers=headers, params=params)

        if response.status_code == 429:
            wait_time = 2
            while response.status_code == 429:
                print(f"Rate limit exceeded. Waiting for {wait_time} seconds...")
                time.sleep(wait_time)
                wait_time *= 2
                response = requests.get(url, headers=headers, params=params)
        elif response.status_code != 200:
            print(f"Error fetching data: {response.status_code} - {response.text}")
            break

        data = response.json()
        papers_fetched = data.get("data", [])

        if not papers_fetched:
            print(f"No more results for query: {query}")
            break

        for paper in papers_fetched:
            year = paper.get("year", 0)
            if START_YEAR <= year <= CURRENT_YEAR:
                papers.append({
                    "Title": paper.get("title", "N/A"),
                    "Abstract": paper.get("abstract", "N/A"),
                    "Authors": ", ".join([author["name"] for author in paper.get("authors", [])]),
                    "Year": year,
                    "URL": paper.get("url", "N/A"),
                    "Citations": paper.get("citationCount", "N/A"),
                    "Journal": (paper.get("journal") or {}).get("name", "N/A"),
                    "Venue": paper.get("venue", "N/A"),
                    "Publication Types": ", ".join(paper.get("publicationTypes", []) or [])
                })

        total_fetched += len(papers_fetched)
        offset += len(papers_fetched)
        time.sleep(1)

    return papers

# Take user input for query
user_query = input("Enter your search query: ")
print(f"Fetching papers for query: {user_query}")

papers = fetch_research_papers(user_query, max_results=100)

# Convert results to a DataFrame
df = pd.DataFrame(papers)
df
