In [1]:
#!pip install arxiv
# import the necessary libraries for the task.
import arxiv
import re
import pandas as pd

In [2]:
#a regular expression is defined which extracts the id such that it starts with 18,19,20,21, or 22. 
# According to the arxiv id nomenclature, the first two digits denote the year when the paper was added to the arxiv
id_reg = re.compile(r"((1[89]|2[012])(0[\d]|1[012])\.[\d]{5}v[\d])")

### Retrieving papers in Databases category from 2018 to 2022.

In [3]:
#categories = ['cs.DB','cs.GR','cs.RO','cs.ET']
# the arxiv library allows the searching of the metadata for the papers uploaded in their database using the arxiv.Search function
search_DB = arxiv.Search(query = 'cs.DB', 
  sort_by = arxiv.SortCriterion.SubmittedDate,
  sort_order = arxiv.SortOrder.Descending)
#we apply the filter for the primary category as cs.DB to obtain those papers only, also in a descending order of publication.
results_DB = search_DB.results()
#arxiv.Search.results() allows to store the metadata in a variable.

In [4]:
# Once the metadata on the papers published in cs.DB category is obtained, we further filter it out to obtain only those that
# were published from 2018 to 2022.
Database = []
# results_DB is a list of metadata of all the papers. Each element is a record of each paper
for result in results_DB:
    short_id = result.get_short_id()   #The result.get_short_id() retrieves the arxiv id in the format YYMM.XXXXXvX format
    #Once id for a paper is obtained, the regex is compared to it
    if id_reg.match(short_id):
        #if it matches, all the different metadata elements are appended to the list, paper
        paper = []
        paper.append(result.title)
        authors = []
        for author in result.authors:
            authors.append(author)
        paper.append(authors)
        paper.append(result.summary)
        paper.append(result.comment)
        paper.append(result.journal_ref)
        paper.append(result.doi)
        paper.append(result.entry_id)
        paper.append(result.updated)
        paper.append(result.published)
        # Once the paper list has the metadata for a single paper, it is appended to a new list as a whole
        Database.append(paper)
#We define column names to create a dataframe for this Database list
column_names = ['Title','Authors','Summary','Comments','Journal References','DOI','Entry ID','Updated On','Published On']
#The Database list along with the column_names is converted into a dataframe named df_DB
df_DB = pd.DataFrame(Database, columns=column_names)
print("Number of papers extracted : ",df_DB.shape[0])
df_DB.tail()    

Number of papers extracted :  3812


Unnamed: 0,Title,Authors,Summary,Comments,Journal References,DOI,Entry ID,Updated On,Published On
3807,Semi-automated Annotation of Signal Events in ...,"[Scott Yang, Silvia Lopez, Meysam Golmohammadi...","To be effective, state of the art machine lear...",Published in IEEE Signal Processing in Medicin...,"S. Yang, S. Lopez, M. Golmohammadi, I. Obeid a...",10.1109/SPMB.2016.7846855,http://arxiv.org/abs/1801.02476v1,2018-01-03 03:47:20+00:00,2018-01-03 03:47:20+00:00
3808,On Optimizing Operator Fusion Plans for Large-...,"[Matthias Boehm, Berthold Reinwald, Dylan Hutc...",Many large-scale machine learning (ML) systems...,,,,http://arxiv.org/abs/1801.00829v1,2018-01-02 20:40:19+00:00,2018-01-02 20:40:19+00:00
3809,A Semantic-Rich Similarity Measure in Heteroge...,"[Yu Zhou, Jianbin Huang, Heli Sun]",Measuring the similarities between objects in ...,arXiv admin note: text overlap with arXiv:1712...,,,http://arxiv.org/abs/1801.00783v3,2018-01-27 02:55:33+00:00,2018-01-02 01:22:48+00:00
3810,Users Constraints in Itemset Mining,"[Christian Bessiere, Nadjib Lazaar, Yahia Lebb...",Discovering significant itemsets is one of the...,,,,http://arxiv.org/abs/1801.00345v2,2018-02-08 16:21:54+00:00,2017-12-31 19:55:52+00:00
3811,An introduction to Graph Data Management,"[Renzo Angles, Claudio Gutierrez]",A graph database is a database where the data ...,,,10.1007/978-3-319-96193-4_1,http://arxiv.org/abs/1801.00036v1,2017-12-29 21:02:12+00:00,2017-12-29 21:02:12+00:00


In [5]:
# We convert the dataframe into a csv file with giving Serial No. as the index_col
df_DB.to_csv('Databases.csv',index = True, index_label = "Serial No.")

### Retrieving papers in Graphics category from 2018 to 2022.

In [3]:
# the arxiv library allows the searching of the metadata for the papers uploaded in their database using the arxiv.Search function
search_GR = arxiv.Search(query = 'cs.GR', 
  sort_by = arxiv.SortCriterion.SubmittedDate,
  sort_order = arxiv.SortOrder.Descending)
results_GR = search_GR.results()
#we apply the filter for the primary category as cs.GR to obtain those papers only, also in a descending order of publication.
#arxiv.Search.results() allows to store the metadata in a variable.

In [4]:
# Once the metadata on the papers published in cs.GR category is obtained, we further filter it out to obtain only those that
# were published from 2018 to 2022. The same method is used for all the categories as documented for the cs.DB papers
Graphics = []
for result in results_GR:
    short_id = result.get_short_id()   
    #print(short_id)
    if id_reg.match(short_id):
        paper = []
        paper.append(result.title)
        authors = []
        for author in result.authors:
            authors.append(author)
        paper.append(authors)
        paper.append(result.summary)
        paper.append(result.comment)
        paper.append(result.journal_ref)
        paper.append(result.doi)
        paper.append(result.entry_id)
        paper.append(result.updated)
        paper.append(result.published)
        #print(paper)
        Graphics.append(paper)
column_names = ['Title','Authors','Summary','Comments','Journal References','DOI','Entry ID','Updated On','Published On']
df_GR = pd.DataFrame(Graphics, columns=column_names)
print("Number of papers extracted : ",df_GR.shape[0])
df_GR.tail()

Number of papers extracted :  3271


Unnamed: 0,Title,Authors,Summary,Comments,Journal References,DOI,Entry ID,Updated On,Published On
3266,Joint convolutional neural pyramid for depth m...,"[Yi Xiao, Xiang Cao, Xianyi Zhu, Renzhi Yang, ...",High-resolution depth map can be inferred from...,,,,http://arxiv.org/abs/1801.00968v1,2018-01-03 11:53:34+00:00,2018-01-03 11:53:34+00:00
3267,Least Square Error Method Robustness of Comput...,[Vaclav Skala],There are many practical applications based on...,,,10.15439/978-83-946253-7-5,http://arxiv.org/abs/1802.07591v1,2018-01-01 13:55:09+00:00,2018-01-01 13:55:09+00:00
3268,O(lgN) Line Clipping Algorithm in E2,[Vaclav Skala],A new O(lg N) line clipping algorithm in E2 ag...,,,10.1016/0097-8493(94)90064-7,http://arxiv.org/abs/1801.00442v1,2018-01-01 13:25:16+00:00,2018-01-01 13:25:16+00:00
3269,A Fast Algorithm for Line Clipping by Convex P...,[Vaclav Skala],A new algorithm for line clipping against conv...,,,,http://arxiv.org/abs/1801.00441v1,2018-01-01 13:13:31+00:00,2018-01-01 13:13:31+00:00
3270,A Comparative Study of LOWESS and RBF Approxim...,"[Michal Smolik, Vaclav Skala, Ondrej Nedved]",Approximation methods are widely used in many ...,,,10.1007/978-3-319-42108-7_31,http://arxiv.org/abs/1801.00432v1,2018-01-01 11:51:37+00:00,2018-01-01 11:51:37+00:00


In [5]:
# We convert the dataframe into a csv file with giving Serial No. as the index_col
df_GR.to_csv('Graphics.csv',index = True, index_label = "Serial No.")

### Retrieving papers in Robotics category from 2018 to 2022.

In [8]:
# the arxiv library allows the searching of the metadata for the papers uploaded in their database using the arxiv.Search function
search_RO = arxiv.Search(query = 'cs.RO', 
  sort_by = arxiv.SortCriterion.SubmittedDate,
  sort_order = arxiv.SortOrder.Descending)
results_RO = search_RO.results()
#we apply the filter for the primary category as cs.RO to obtain those papers only, also in a descending order of publication.
#arxiv.Search.results() allows to store the metadata in a variable.

In [9]:
# Once the metadata on the papers published in cs.RO category is obtained, we further filter it out to obtain only those that
# were published from 2018 to 2022. The same method is used for all the categories as documented for the cs.DB papers
Robotics = []
for result in results_RO:
    short_id = result.get_short_id()   
    #print(short_id)
    if id_reg.match(short_id):
        paper = []
        paper.append(result.title)
        authors = []
        for author in result.authors:
            authors.append(author)
        paper.append(authors)
        paper.append(result.summary)
        paper.append(result.comment)
        paper.append(result.journal_ref)
        paper.append(result.doi)
        paper.append(result.entry_id)
        paper.append(result.updated)
        paper.append(result.published)
        #print(paper)
        Robotics.append(paper)
column_names = ['Title','Authors','Summary','Comments','Journal References','DOI','Entry ID','Updated On','Published On']
df_RO = pd.DataFrame(Robotics, columns=column_names)
print("Number of papers extracted : ",df_RO.shape[0])
df_RO.tail()

Number of papers extracted :  18574


Unnamed: 0,Title,Authors,Summary,Comments,Journal References,DOI,Entry ID,Updated On,Published On
18569,SenseNet: 3D Objects Database and Tactile Simu...,[Jason Toy],The majority of artificial intelligence resear...,,,,http://arxiv.org/abs/1801.00361v1,2017-12-31 21:50:15+00:00,2017-12-31 21:50:15+00:00
18570,Neurally Plausible Model of Robot Reaching Ins...,"[Zahra Mahoor, Bruce MacLennan, Allen McBride]",In this paper we present a neurally plausible ...,,,,http://arxiv.org/abs/1801.00293v1,2017-12-31 14:40:44+00:00,2017-12-31 14:40:44+00:00
18571,Multichannel Robot Speech Recognition Database...,"[José Novoa, Juan Pablo Escudero, Josué Fredes...",In real human robot interaction (HRI) scenario...,,,,http://arxiv.org/abs/1801.00061v1,2017-12-30 00:01:08+00:00,2017-12-30 00:01:08+00:00
18572,LaMMos - Latching Mechanism based on Motorized...,[Luis A. Mateos],Reconfigurable robots refer to a category of r...,"14 pages, 15 figures",,,http://arxiv.org/abs/1801.00035v1,2017-12-27 17:01:52+00:00,2017-12-27 17:01:52+00:00
18573,Collision Selective Visual Neural Network Insp...,"[Qinbing Fu, Cheng Hu, Shigang Yue]",For autonomous robots in dynamic environments ...,,,,http://arxiv.org/abs/1801.06452v1,2017-12-22 00:34:55+00:00,2017-12-22 00:34:55+00:00


In [10]:
# We convert the dataframe into a csv file with giving Serial No. as the index_col
df_RO.to_csv('Robotics.csv',index = True, index_label = "Serial No.")

### Retrieving papers in Emerging Technologies category from 2018 to 2022.

In [11]:
# the arxiv library allows the searching of the metadata for the papers uploaded in their database using the arxiv.Search function
search_ET = arxiv.Search(query = 'cs.ET', 
  sort_by = arxiv.SortCriterion.SubmittedDate,
  sort_order = arxiv.SortOrder.Descending)
results_ET = search_ET.results()
#we apply the filter for the primary category as cs.ET to obtain those papers only, also in a descending order of publication.
#arxiv.Search.results() allows to store the metadata in a variable.

In [12]:
# Once the metadata on the papers published in cs.ET category is obtained, we further filter it out to obtain only those that
# were published from 2018 to 2022. The same method is used for all the categories as documented for the cs.DB papers
ET = []
for result in results_ET:
    short_id = result.get_short_id()   
    #print(short_id)
    if id_reg.match(short_id):
        paper = []
        paper.append(result.title)
        authors = []
        for author in result.authors:
            authors.append(author)
        paper.append(authors)
        paper.append(result.summary)
        paper.append(result.comment)
        paper.append(result.journal_ref)
        paper.append(result.doi)
        paper.append(result.entry_id)
        paper.append(result.updated)
        paper.append(result.published)
        #print(paper)
        ET.append(paper)
column_names = ['Title','Authors','Summary','Comments','Journal References','DOI','Entry ID','Updated On','Published On']
df_ET = pd.DataFrame(ET, columns=column_names)
print("Number of papers extracted : ",df_ET.shape[0])
df_ET.tail()

Number of papers extracted :  2126


Unnamed: 0,Title,Authors,Summary,Comments,Journal References,DOI,Entry ID,Updated On,Published On
2121,Accelerating Deep Learning with Memcomputing,"[Haik Manukian, Fabio L. Traversa, Massimilian...",Restricted Boltzmann machines (RBMs) and their...,"8 pages, 5 figures",,,http://arxiv.org/abs/1801.00512v3,2018-10-23 19:23:11+00:00,2018-01-01 21:27:11+00:00
2122,Implementing Bayesian Networks with Embedded S...,"[Rafatul Faria, Kerem Y. Camsari, Supriyo Datta]",Magnetic tunnel junctions (MTJ's) with low bar...,,"AIP Advances 8, 045101 (2018)",10.1063/1.5021332,http://arxiv.org/abs/1801.00497v2,2018-04-02 19:19:59+00:00,2018-01-01 19:21:44+00:00
2123,Level-Shifted Neural Encoded Analog-to-Digital...,"[Aigerim Tankimanova, Akshay Kumar Maan, Alex ...",This paper presents the new approach in implem...,,2017 IEEE International Conference on Electron...,,http://arxiv.org/abs/1801.00448v1,2018-01-01 14:09:40+00:00,2018-01-01 14:09:40+00:00
2124,Principles of Neuromorphic Photonics,"[Bhavin J. Shastri, Alexander N. Tait, Thomas ...","In an age overrun with information, the abilit...","28 pages, 19 figures",,10.1007/978-3-642-27737-5_702-1,http://arxiv.org/abs/1801.00016v1,2017-12-29 19:07:22+00:00,2017-12-29 19:07:22+00:00
2125,Bridging the Gap Between Neural Networks and N...,"[Yu Ji, YouHui Zhang, WenGuang Chen, Yuan Xie]",Different from developing neural networks (NNs...,Accepted by ASPLOS 2018,,,http://arxiv.org/abs/1801.00746v3,2018-01-18 18:05:39+00:00,2017-11-15 22:52:34+00:00


In [13]:
# We convert the dataframe into a csv file with giving Serial No. as the index_col
df_ET.to_csv('Emerging Technologies.csv',index = True, index_label = "Serial No.")