## Trying it out with the open acces API arXiv:

In [11]:
from bs4 import BeautifulSoup
import requests
import csv

# Define the API endpoint and query parameters
api_url = 'http://export.arxiv.org/api/query'
query_params = {
    'search_query': 'cat:cs.AI',
    'start': 0,
    'max_results': 2000,
}

# Initialize a list to store publication data
publications = []

# Make API requests in batches
while query_params['start'] < query_params['max_results']:
    # Make the API request
    response = requests.get(api_url, params=query_params)

    # Check the response status
    if response.status_code == 200:
        # Parse the HTML response using BeautifulSoup with 'html.parser'
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract publication information
        for entry in soup.find_all('entry'):
            title = entry.find('title').text.strip()
            abstract = entry.find('summary').text.strip()

            # Check if 'arxiv:comment' element exists before accessing 'text'
            arxiv_comment = entry.find('arxiv:comment')
            if arxiv_comment:
                citations = arxiv_comment.text.strip()
            else:
                citations = 'N/A'

            # Append publication data to the list
            publications.append([title, abstract, citations])

        # Update the 'start' parameter for the next batch
        query_params['start'] += len(soup.find_all('entry'))
    else:
        print(f"API request failed with status code {response.status_code}")
        break

# Specify the CSV file name
csv_filename = 'publications.csv'

# Write the data to the CSV file
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    # Write the header row
    csv_writer.writerow(['Title', 'Abstract', 'Citations'])
    # Write the publication data
    csv_writer.writerows(publications)

print(f"Data saved to '{csv_filename}'")


Data saved to 'publications.csv'


This seems to work