In [None]:
# Import required libraries

from pyarxiv import query, download_entries
from pyarxiv.arxiv_categories import ArxivCategory, arxiv_category_map

import pandas as pd

In [None]:
# Ask user for a topic and call it "topic"

topic = input("Enter the topic for which you want to search papers on arXiv: ")

# Generate API response based on "topic" and call it "entries"

entries = query(title = topic)

In [None]:
# Pull title, author, date, link to PDF of paper from "entries"
# and put each in its own list

titles = map(lambda x: x['title'], entries)
authors = map(lambda x: x['author'], entries)
updated = map(lambda x: x['updated'], entries)
links = map(lambda x: x['link'], entries)
abstract = map(lambda x: x['summary'], entries)

# Create empty dataframe called "papers"

papers = pd.DataFrame()

# Insert columns into "papers" from the previously created lists

papers['Title'] = pd.Series(titles)
papers['Author'] = pd.Series(authors)
papers['Updated'] = pd.Series(updated)
papers['Link'] = pd.Series(links)
papers['Abstract'] = pd.Series(abstract)

# Slice HH:MM:SS off of each row in date column

papers['Updated'] = papers['Updated'].str.slice(stop = 10)


# Abstract Slice
papers['Abstract'] = papers['Abstract'].str.slice(stop=100)

# Reformat URL string to take user to the PDF of the paper

papers['Link'] = papers['Link'].str.replace("abs", "pdf", case = True)

# Strip paper ID from Link URL and put it in its own column called "ID"

papers['ID'] = pd.Series(papers['Link'].str.rsplit("/", n=1, expand=True)[1])

In [None]:
# Uncomment line of code below to export result as a CSV file

# papers.to_csv(topic + ' arXiv papers.csv')

# Sort dataframe in descending order by date

papers = papers.sort_values(by = 'Updated', ascending = False).reset_index(drop = True)

# Show first 20 papers in dataframe

papers.head(20)

## Automating Downloads into a Folder of Your Choice
For example, let's assume that I have created a folder called "arXiv papers" on my desktop and want to download the PDFs of the 20 papers shown above into that folder. This can be easily accomplished with the lines of code below

In [None]:
# Loop through index, pull ID for each paper from dataframe,
# and use it to push the download from the API to your destination folder.

# Be sure that the folder has been created before running the loop.
# If you simply want the papers to download in the same location as your notebook,
# either remove the target_folder argument entirely, or enter '.' as the file path

# for i in range(0, 20):
    
#     download_entries(entries_or_ids_or_uris = [papers['ID'][i]],
#                      target_folder = './papers')
