In [2]:
import pandas as pd
import arxiv

In [22]:

def format_query(author='',title='',cat='',abstract=''):
    """Returns a formatted arxiv query string to handle simple queries of at most one instance each of these fields. To leave a field unspecified,
    leave the corresponding argument blank.
    
    e.g. format_query(cat='math.AP') will return the string used to pull all articles with the subject tag 'PDEs'.

    Args:
        author: string to search for in the author field.
        title: string to search for in the title field.
        cat: A valid arxiv subject tag. See the full list of these at:
        https://arxiv.org/category_taxonomy
        abstract: string to search for in the abstract field.

    Returns:
        properly formatted query string to return all results simultaneously matching all specified fields.
    """

    tags = [f'au:{author}', f'ti:{title}', f'cat:{cat}', f'abs:{abstract}'] 
    query = ' AND '.join([tag for tag in tags if not tag.endswith(':')])
    return query



def query_to_df(query,max_results):
    """Returns the results of an arxiv API query in a pandas dataframe.

    Args:
        query: string defining an arxiv query formatted according to 
        https://info.arxiv.org/help/api/user-manual.html#51-details-of-query-construction
        
        max_results: positive integer specifying the maximum number of results returned.

    Returns:
        pandas dataframe with one column for indivial piece of metadata of a returned result.
        To see a list of these columns and their descriptions, see the documentation for the Results class of the arxiv package here:
        http://lukasschwab.me/arxiv.py/index.html#Result

        The 'links' column is dropped and the authors column is replaced by a single string of each author name separated by a comma.

    """
    search = arxiv.Search(
            query = query,
            max_results=max_results,
            sort_by=arxiv.SortCriterion.LastUpdatedDate
            )
    results = search.results()

    drop_cols = ['authors','links','_raw']
    df = pd.DataFrame()

    for result in results:
        row_dict = {k : v for (k,v) in vars(result).items() if k not in drop_cols}
        row_dict['authors'] = ','.join([author.name for author in result.authors])
        row = pd.Series(row_dict)
        df = pd.concat([df , row.to_frame().transpose()], axis = 0)

    return df.reset_index(drop=True,inplace=False)





In [29]:
## Example: Pulling the most recently updated 1000 articles with the primary subject category 'PDEs'

query = format_query(cat='math.AP')
pdes = query_to_df(query=query,max_results=1000)

pdes.head()

Unnamed: 0,entry_id,updated,published,title,summary,comment,journal_ref,doi,primary_category,categories,pdf_url,authors
0,http://arxiv.org/abs/2210.09278v2,2023-05-12 17:58:14+00:00,2022-10-17 17:30:48+00:00,The quantization of Proca fields on globally h...,This paper deals with several issues concernin...,45 pages --- accepted in Annales Henri Poincar\'e,,,math-ph,"[math-ph, math.AP, math.MP, math.OA, Primary: ...",http://arxiv.org/pdf/2210.09278v2,"Valter Moretti,Simone Murro,Daniele Volpe"
1,http://arxiv.org/abs/2109.06685v3,2023-05-12 17:55:26+00:00,2021-09-14 13:41:14+00:00,Paracausal deformations of Lorentzian metrics ...,Given a pair of normally hyperbolic operators ...,"55 pages, 4 figures",,,math-ph,"[math-ph, gr-qc, math.AP, math.DG, math.MP, ma...",http://arxiv.org/pdf/2109.06685v3,"Valter Moretti,Simone Murro,Daniele Volpe"
2,http://arxiv.org/abs/2305.01065v3,2023-05-12 17:46:00+00:00,2023-05-01 19:55:32+00:00,Coefficient Inverse Problems for a Generalized...,The mean field games theory has broad applicat...,,,,math.AP,[math.AP],http://arxiv.org/pdf/2305.01065v3,"Michael V. Klibanov,Jingzhi Li,Hongyu Liu"
3,http://arxiv.org/abs/2305.07634v1,2023-05-12 17:41:15+00:00,2023-05-12 17:41:15+00:00,Sobolev estimates for singular-degenerate quas...,We study a conormal boundary value problem for...,"25 pages, submitted",,,math.AP,"[math.AP, 35J70, 35J75, 35J62, 35D30, 35B45]",http://arxiv.org/pdf/2305.07634v1,"Hongjie Dong,Tuoc Phan,Yannick Sire"
4,http://arxiv.org/abs/2305.07626v1,2023-05-12 17:26:55+00:00,2023-05-12 17:26:55+00:00,Quantitative Estimates and Global Strong Solut...,We prove quantitative growth estimates for lar...,28 pages,,,math.AP,"[math.AP, math-ph, math.MP]",http://arxiv.org/pdf/2305.07626v1,Dominic Wynter
