In [77]:
import os
import sys
path = os.path.join(os.path.normpath(os.getcwd() + os.sep + os.pardir), 'Arxiv')
sys.path.insert(0, path)
from Arxiv import Arxiv

In [79]:
def search_query(categories_filter=True, authors_filter=True, key_phrases_filter=False):
    """
    Function for generating search query for arxiv. The actual categories, authors and key phrases 
    to filter by are stored in external text files. 

    Currently the search is generated by taking all authors, key phrases and categories and combining
    into a single string by repeated logical ORs.

    Parameters:
    -----------
    categories_filter: Bool, optional
        Whether to filter by categories.

    author_filter: Bool, optional
        Whether to filter by authors.

    key_phrases_filter: Bool, optional
        Whether to filter using key words.

    Returns:
    --------
    search_query: string
        Arxiv API compatible search query string.
    """

    categories = ""
    key_phrases = ""
    authors = ""

    if categories_filter:
        categories_path = os.path.normpath(os.getcwd() + os.sep + os.pardir)
        categories_path = os.path.join(categories_path, 'Arxiv', 'query_information', 'categories.txt')

        with open(categories_path, 'r') as f:
            categories = f.read().splitlines()
        categories = '(cat:' + ' OR cat:'.join(categories) + ')'

    if authors_filter:
        authors_path = os.path.normpath(os.getcwd() + os.sep + os.pardir)
        authors_path = os.path.join(authors_path, 'Arxiv','query_information', 'authors.txt')

        with open(authors_path, 'r') as f:
            authors = f.read().splitlines()
        authors = '(au:' + ' OR au:'.join(authors) + ')'

    if key_phrases_filter:
        key_phrases_path = os.path.normpath(os.getcwd() + os.sep + os.pardir)
        key_phrases_path = os.path.join(key_phrases_path, 'Arxiv','query_information', 'key_phrases.txt')

        with open(key_phrases_path, 'r') as f:
            key_phrases = f.read().splitlines()
        key_phrases = '(au:' + ' OR au:'.join(key_phrases) + ')'

    search_query_string = authors + " OR " + categories 

    return search_query_string

search_query_string = search_query()
search_query_string

'(au:Goodfellow OR au:McInnes) OR (cat:cond-mat.dis-nn OR cat:cs.AI OR cat:cs.CV OR cat:ds.CB OR cat:cs.DC OR cat:cs.GL OR cat:cs.IT OR cat:cs.LG OR cat:cs.NA OR cat:cs.PL OR cat:stat.AP OR cat:stat.CO OR cat:stat.ME OR cat:stat.ML OR cat:stat.OT OR cat:stat.TH OR cat:eess.AS)'

In [85]:
Arxiv_example = Arxiv(number_of_repeats=2, max_results=2)

In [86]:
Arxiv_example._generate_papers(search_query_string)
Arxiv_example.queried_papers_unfiltered 

[{'id': 'http://arxiv.org/abs/1905.09813v1',
  'guidislink': True,
  'updated': '2019-05-23T17:59:31Z',
  'updated_parsed': time.struct_time(tm_year=2019, tm_mon=5, tm_mday=23, tm_hour=17, tm_min=59, tm_sec=31, tm_wday=3, tm_yday=143, tm_isdst=0),
  'published': '2019-05-23T17:59:31Z',
  'published_parsed': time.struct_time(tm_year=2019, tm_mon=5, tm_mday=23, tm_hour=17, tm_min=59, tm_sec=31, tm_wday=3, tm_yday=143, tm_isdst=0),
  'title': 'A Condition Number for Hamiltonian Monte Carlo',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': 'http://export.arxiv.org/api/query?search_query=%28au%3AGoodfellow+OR+au%3AMcInnes%29+OR+%28cat%3Acond-mat.dis-nn+OR+cat%3Acs.AI+OR+cat%3Acs.CV+OR+cat%3Ads.CB+OR+cat%3Acs.DC+OR+cat%3Acs.GL+OR+cat%3Acs.IT+OR+cat%3Acs.LG+OR+cat%3Acs.NA+OR+cat%3Acs.PL+OR+cat%3Astat.AP+OR+cat%3Astat.CO+OR+cat%3Astat.ME+OR+cat%3Astat.ML+OR+cat%3Astat.OT+OR+cat%3Astat.TH+OR+cat%3Aeess.AS%29&id_list=&start=0&max_results=2&sortBy=submittedDate&sortOrder=

In [None]:
Arxiv_example._filter_papers_time(search_query_string)
Arxiv_example.queried_papers_unfiltered 

In [None]:
Arxiv_example.arxiv_papers(search_query_string)
Arxiv_example.fav_papers