# Basic query examples

## Example 1 

Parse the authors name of the 14th paper in the paper list queried with "particles", and the following 10.

In [1]:
from pprint import pprint
from scrapxiv.shelf import Shelf

# from scrapxiv.parsers_utils import query_papers_to_dict, papers_dict_to_author_list, add_new_entries_to_df, papers_dict_to_list_of_papers_id

shelf = Shelf()
shelf.query(keywords="particles", index=14, max_results=10)

Get all the authors found in the query:

In [2]:
# returns a list of lists like [name, affiliation (if any), paper_id, paper_title, paper_published_date]
for au in shelf.authors():
    pprint(au)

{'affiliation': 'Washington University in St.Louis, MO',
 'name': 'C. Barbieri',
 'paper_id': 'http://arxiv.org/abs/nucl-th/0012055v1',
 'paper_published_date': '2000-12-14T20:51:03Z',
 'paper_title': 'Faddeev description of two-hole one-particle motion and '
                'thesingle-particle spectral function'}
{'affiliation': 'Washington University in St.Louis, MO',
 'name': 'W. H. Dickhoff',
 'paper_id': 'http://arxiv.org/abs/nucl-th/0012055v1',
 'paper_published_date': '2000-12-14T20:51:03Z',
 'paper_title': 'Faddeev description of two-hole one-particle motion and '
                'thesingle-particle spectral function'}
{'affiliation': 'Texas Tech University',
 'name': 'S. Bhattacharya',
 'paper_id': 'http://arxiv.org/abs/0802.0608v1',
 'paper_published_date': '2008-02-05T13:03:03Z',
 'paper_title': 'Effect of small particles on the near-wall dynamics of a '
                'large particlein a highly bidisperse colloidal solution'}
{'affiliation': 'Yale University',
 'name': 'J.

Get all the papers id (unique identifier to the paper url)

In [3]:
shelf.papers_ids()

['http://arxiv.org/abs/nucl-th/0012055v1',
 'http://arxiv.org/abs/0802.0608v1',
 'http://arxiv.org/abs/1905.05638v1',
 'http://arxiv.org/abs/1908.06019v1',
 'http://arxiv.org/abs/1311.1058v1',
 'http://arxiv.org/abs/1906.01779v1',
 'http://arxiv.org/abs/1406.5506v1',
 'http://arxiv.org/abs/1707.03205v1',
 'http://arxiv.org/abs/1110.3090v2',
 'http://arxiv.org/abs/1506.05560v1']

# Example 2
Query authors data from the papers_dict of 30 papers, filtering with the keywords "sub multisets".

In [7]:
shelf = Shelf()
shelf.query(keywords="sub multisets", index=1, max_results=5)
df_multiset = shelf.authors(as_dataframe=True)
df_multiset

Unnamed: 0,name,affiliation,paper_id,paper_title,paper_published_date,num_publications
0,Jean-Baptiste Gramain,,http://arxiv.org/abs/1101.5071v2,On bar lengths in partitions,2011-01-26T14:47:11Z,1
1,Jorn B. Olsson,,http://arxiv.org/abs/1101.5071v2,On bar lengths in partitions,2011-01-26T14:47:11Z,1
2,Håkon Robbestad Gylterud,,http://arxiv.org/abs/1612.05468v1,From Multisets to Sets in Hotmotopy Type Theory,2016-12-16T13:52:57Z,1
3,Mark Sh. Levin,,http://arxiv.org/abs/1205.2046v1,Multiset Estimates and Combinatorial Synthesis,2012-05-09T17:42:36Z,1
4,Christian Steinruecken,,http://arxiv.org/abs/1401.6410v1,Compressing Sets and Multisets of Sequences,2014-01-24T17:36:32Z,1
5,Rinovia Simanjuntak,,http://arxiv.org/abs/1711.00225v2,The multiset dimension of graphs,2017-11-01T07:01:49Z,1
6,Presli Siagian,,http://arxiv.org/abs/1711.00225v2,The multiset dimension of graphs,2017-11-01T07:01:49Z,1
7,Tomas Vetrik,,http://arxiv.org/abs/1711.00225v2,The multiset dimension of graphs,2017-11-01T07:01:49Z,1


To optimize resources, we can divide the downloads in 3 batches.

In [8]:
import pandas as pd


batch_size = 10
num_batches = 3
shelf = Shelf()

df_authors = pd.DataFrame()

for j in range(num_batches):
    print("Processing batch {}".format(j))
    shelf.query(keywords="sub multisets", index=j * batch_size, max_results=batch_size)
    df_batch = shelf.authors(as_dataframe=True)
    if df_batch is not None:
        df_authors = df_authors.append(df_batch, ignore_index=True)


Processing batch 0
Processing batch 1
Processing batch 2


In [10]:
df_authors.head()

Unnamed: 0,name,affiliation,paper_id,paper_title,paper_published_date,num_publications
0,Sebastiano Ferraris,,http://arxiv.org/abs/1511.06142v1,Counting sub-multisets of fixed cardinality,2015-11-19T12:44:42Z,1
1,Alex Mendelson,,http://arxiv.org/abs/1511.06142v1,Counting sub-multisets of fixed cardinality,2015-11-19T12:44:42Z,1
2,Gerardo Ballesio,,http://arxiv.org/abs/1511.06142v1,Counting sub-multisets of fixed cardinality,2015-11-19T12:44:42Z,1
3,Tom Vercauteren,,http://arxiv.org/abs/1511.06142v1,Counting sub-multisets of fixed cardinality,2015-11-19T12:44:42Z,1
4,Jean-Baptiste Gramain,,http://arxiv.org/abs/1101.5071v2,On bar lengths in partitions,2011-01-26T14:47:11Z,1
