In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [29]:
def fetch_arxiv_papers(query, max_results=50):
    """
    Fetches research papers from arXiv.org based on a query.
    """
    base_url = "http://export.arxiv.org/api/query"
    params = {
        "search_query": f"all:{query}",
        "start": 0,
        "max_results": max_results,
        "sortBy": "relevance",
    }
    
    response = requests.get(base_url, params=params)
    
    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code}")
        return []
    
    soup = BeautifulSoup(response.content, "xml")
    papers = []
    
    for entry in soup.find_all("entry"):
        title = entry.title.text.strip()
        summary = entry.summary.text.strip()
        link = entry.id.text.strip()
        authors = [author.find("name").text for author in entry.find_all("author")]
        year = entry.published.text.strip().split("-")[0]
    
        papers.append({
            "Title": title,
            "Summary": summary,
            "Authors": ", ".join(authors),
            "Link": link,
            "Year": year
        })
    
    return papers


In [36]:
# Usage
query = "Machine Learning"
papers = fetch_arxiv_papers(query)

# Save to CSV
df = pd.DataFrame(papers)
df.to_csv(f"{query.lower().replace(' ', '_')}_papers.csv", index=False)
print(f"Saved {len(papers)} papers to {query.lower().replace(' ', '_')}_papers.csv")

Saved 50 papers to machine_learning_papers.csv
