# Basic query examples

## Example 1 

Parse the authors name of the 14th paper in the paper list queried with "particles", and the following 10.

In [15]:
from pprint import pprint
from scrapxiv.shelf import Shelf

# from scrapxiv.parsers_utils import query_papers_to_dict, papers_dict_to_author_list, add_new_entries_to_df, papers_dict_to_list_of_papers_id

shelf = Shelf()
shelf.query(keywords="deep learning", index=14, max_results=10)

Get all the authors found in the query:

In [16]:
# returns a list of lists like [name, affiliation (if any), paper_id, paper_title, paper_published_date]
for au in shelf.authors():
    pprint(au)

{'affiliation': '',
 'email': '',
 'name': 'Chunwei Tian',
 'paper_id': 'http://arxiv.org/abs/1912.13171v2',
 'paper_published_date': '2019-12-31T05:03:57Z',
 'paper_title': 'Deep Learning on Image Denoising: An overview'}
{'affiliation': '',
 'email': '',
 'name': 'Lunke Fei',
 'paper_id': 'http://arxiv.org/abs/1912.13171v2',
 'paper_published_date': '2019-12-31T05:03:57Z',
 'paper_title': 'Deep Learning on Image Denoising: An overview'}
{'affiliation': '',
 'email': '',
 'name': 'Wenxian Zheng',
 'paper_id': 'http://arxiv.org/abs/1912.13171v2',
 'paper_published_date': '2019-12-31T05:03:57Z',
 'paper_title': 'Deep Learning on Image Denoising: An overview'}
{'affiliation': '',
 'email': '',
 'name': 'Yong Xu',
 'paper_id': 'http://arxiv.org/abs/1912.13171v2',
 'paper_published_date': '2019-12-31T05:03:57Z',
 'paper_title': 'Deep Learning on Image Denoising: An overview'}
{'affiliation': '',
 'email': '',
 'name': 'Wangmeng Zuo',
 'paper_id': 'http://arxiv.org/abs/1912.13171v2',
 'pape

Get all the papers id (unique identifier to the paper url)

In [17]:
shelf.papers_ids()

['http://arxiv.org/abs/1912.13171v2',
 'http://arxiv.org/abs/1710.06798v1',
 'http://arxiv.org/abs/1801.00631v1',
 'http://arxiv.org/abs/1802.00810v2',
 'http://arxiv.org/abs/1807.06399v1',
 'http://arxiv.org/abs/1708.05866v2',
 'http://arxiv.org/abs/1703.02910v1',
 'http://arxiv.org/abs/2004.00993v2',
 'http://arxiv.org/abs/1807.04739v1',
 'http://arxiv.org/abs/1802.08717v1']

# Example 2
Query authors data from the papers_dict of 30 papers, filtering with the keywords "sub multisets".

In [18]:
shelf = Shelf()
shelf.query(keywords="deep learning", index=1, max_results=5)
df_multiset = shelf.authors(as_dataframe=True)
df_multiset

Unnamed: 0,name,affiliation,email,paper_id,paper_title,paper_published_date,num_publications
0,Daniel T Chang,,,http://arxiv.org/abs/1806.01756v1,Concept-Oriented Deep Learning,2018-06-05T15:50:30Z,1
1,Aras R. Dargazany,,,http://arxiv.org/abs/1908.02130v1,Deep learning research landscape & roadmap in ...,2019-07-30T16:57:38Z,1
2,Mengwei Xu,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
3,Jiawei Liu,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
4,Yuanqiang Liu,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
5,Felix Xiaozhu Lin,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
6,Yunxin Liu,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
7,Xuanzhe Liu,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
8,Ronny Ronen,,,http://arxiv.org/abs/1705.03921v1,Why & When Deep Learning Works: Looking Inside...,2017-05-10T18:52:26Z,1
9,Xiao Dong,,,http://arxiv.org/abs/1901.02354v2,Geometrization of deep networks for the interp...,2019-01-06T14:32:45Z,1


To optimize resources, we can divide the downloads in 3 batches.

In [20]:
import pandas as pd


batch_size = 10
num_batches = 3
shelf = Shelf()

df_authors = pd.DataFrame()

for j in range(num_batches):
    print("Processing batch {}".format(j))
    shelf.query(keywords="deep learning", index=j * batch_size, max_results=batch_size)
    df_batch = shelf.authors(as_dataframe=True, get_email=True)
    if df_batch is not None:
        df_authors = df_authors.append(df_batch, ignore_index=True)


Processing batch 0
Paper http://arxiv.org/pdf/1805.08355v1.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1805.08355v1.pdf.
Paper http://arxiv.org/pdf/1806.01756v1.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1806.01756v1.pdf.
Paper http://arxiv.org/pdf/1908.02130v1.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1908.02130v1.pdf.
Paper http://arxiv.org/pdf/1812.05448v3.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1812.05448v3.pdf.
Paper http://arxiv.org/pdf/1705.03921v1.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1705.03921v1.pdf.
Paper http://arxiv.org/pdf/1901.02354v2.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1901.02354v2.pdf.
Paper http://arxiv.org/pdf/1602.00203v1.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1602.00203v1.pdf.
Paper http://arxiv.org/pdf/1805.04825v1.pdf downloaded to /home/savoy/repos/arxiv_authors_parser/tmp/1805.04825v1.pdf.
Paper http://arxiv.org/pdf/19

In [22]:
df_authors[:20]

Unnamed: 0,name,affiliation,email,paper_id,paper_title,paper_published_date,num_publications
0,Dian Lei,,,http://arxiv.org/abs/1805.08355v1,Opening the black box of deep learning,2018-05-22T02:12:33Z,1
1,Xiaoxiao Chen,,,http://arxiv.org/abs/1805.08355v1,Opening the black box of deep learning,2018-05-22T02:12:33Z,1
2,Jianfei Zhao,,,http://arxiv.org/abs/1805.08355v1,Opening the black box of deep learning,2018-05-22T02:12:33Z,1
3,Daniel T Chang,,dtchang43@gmail.com,http://arxiv.org/abs/1806.01756v1,Concept-Oriented Deep Learning,2018-06-05T15:50:30Z,1
4,Aras R. Dargazany,,,http://arxiv.org/abs/1908.02130v1,Deep learning research landscape & roadmap in ...,2019-07-30T16:57:38Z,1
5,Mengwei Xu,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
6,Jiawei Liu,,"{mwx,1500012828,yuanqiangliu}@pku.edu.cn",http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
7,Yuanqiang Liu,,"{mwx,1500012828,yuanqiangliu}@pku.edu.cn",http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
8,Felix Xiaozhu Lin,,,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
9,Yunxin Liu,,yunxin.liu@microsoft.com,http://arxiv.org/abs/1812.05448v3,A First Look at Deep Learning Apps on Smartphones,2018-11-08T07:59:23Z,1
