In [76]:
import json
import urllib
import warnings

class SemanticScholarMetaDataExtractor():
    def __init__(self):
        self.API_ID = 'https://api.semanticscholar.org/v1/paper/arXiv:'

    def get_response(self, paper_id):
        paper_url = self.API_ID+paper_id
        return urllib.request.urlopen(paper_url)
    
    def get_data_json(self, paper_id):
        response = self.get_response(paper_id)
        return json.loads(response.read())

class ArXivPaper():
    def __init__(self, paper):
        self.paper = paper

        self.essential_metadata_keys = {'abstract', 'arxivId', 'authors', 'citations', 'influentialCitationCount',
                                        'doi', 'fieldsOfStudy', 'paperId', 'references',
                                        'title', 'topics', 'url', 'venue', 'year'}

        self.representational_info_keys = ['abstract', 'authors', 'url', 'year',
                                           'fieldsOfStudy', 'numCitations', 'venue', 'numReferences']
        
        self.check_paper()
        self.check_relevant_keys()
        self.discard_non_influential_citations()
        self.discard_non_influential_references()
        self.discard_none_arxiv_references()
        self.discard_none_arxiv_citations()
        self.set_num_references()
        self.set_num_citations()

    def check_paper(self):
        if isinstance(self.paper, str):
            warnings.warn("Paper not present in memory. Extracting Paper MetaData from Semantic Scholar!")
            metadata_extractor = SemanticScholarMetaDataExtractor()
            self.paper = metadata_extractor.get_data_json(self.paper)
        
        elif not isinstance(self.paper, dict):
            raise TypeError("Paper must be a Dict or an Arxiv Id")
    
    def check_relevant_keys(self):
        missing_keys = self.essential_metadata_keys.difference(self.paper.keys())
        if not missing_keys == set():
            error_message = "The following essential keys are missing from the paper: " + \
                            ", ".join(missing_keys)
            raise KeyError(error_message)
        

    def discard_non_influential_citations(self):
        self.paper['citations'] = list(filter(lambda i: i['isInfluential'] is True, self.paper['citations']))

    def discard_none_arxiv_references(self):
        self.paper['references'] = list(filter(lambda i: i['arxivId'] is not None, self.paper['references'])) 

    def discard_none_arxiv_citations(self):
        self.paper['citations'] = list(filter(lambda i: i['arxivId'] is not None, self.paper['citations']))

    def discard_non_influential_references(self):
        self.paper['references'] = list(filter(lambda i: i['isInfluential'] is True, self.paper['references'])) 

    def set_num_references(self):
        self.paper['numReferences'] = len(self.paper['references'])

    def set_num_citations(self):
        self.paper['numCitations'] = len(self.paper['citations'])

    def __getitem__(self, key):
        return self.paper[key]
    
    def __repr__(self):
        repr = f"Paper Title: {self.__getitem__('title')} \n\n"
        for idx, key in enumerate(self.representational_info_keys):
            if key == 'abstract':
                repr += f"{idx+1}) {'Abstract'}: \n{self.__getitem__(key)} \n\n"
                continue
            if key == 'authors':
                repr += f"{idx+1}) {'Authors'}:\n"
                authors = self.__getitem__(key)
                for i, author in enumerate(authors):
                    repr += f"\t{i+1}) {'Name'}: {author.__getitem__('name')}\n"
                    repr += f"\t{'URL'}: {author.__getitem__('url')}\n"
                    repr +="\n"
                continue
            repr += f"{idx+1}) {key}: {self.__getitem__(key)} \n\n"
        return(repr)
    
    def get_top_k_citations_information(self, k:int):
        if k > self.__getitem__('numCitations'):
             warnings.warn(f"Total citations are {self.__getitem__('numCitations')}. Retrieving all citations")
             k = self.__getitem__('numCitations')

        citations = []
        all_citations = self.__getitem__('citations')

        info_keys = ['arxivId', 'authors', 'title', 'url','venue', 'year']

        i=0
        while i < k:
            citation = all_citations[i]        
            citation = {key:val for key, val in citation.items() if key in info_keys}
            citations.append(citation)
            i+=1

        return citations

    def get_top_k_references_information(self, k:int):
        if k > self.__getitem__('numReferences'):
             warnings.warn(f"Total references are {self.__getitem__('numReferences')}. Retrieving all references")
             k = self.__getitem__('numReferences')

        references = []
        all_references = self.__getitem__('references')

        info_keys = ['arxivId', 'authors', 'title', 'url','venue', 'year']

        i=0
        while i < k:
            reference = all_references[i]
            reference = {key:val for key, val in reference.items() if key in info_keys}
            references.append(reference)
            i+=1

        return references

    def get_top_k_references_metadata(self, k:int):
        reference_papers = []
        references = self.get_top_k_references_information(k)

        for i in range(len(references)):
            reference_papers.append(ArXivPaper(references[i]['arxivId']))

        return reference_papers

    def get_top_k_citations_metadata(self, k:int):
        citation_papers = []
        citations = self.get_top_k_citations_information(k)

        for i in range(len(citations)):
            citation_papers.append(ArXivPaper(citations[i]['arxivId']))

        return citation_papers
    

class GraphNode():
    def __init__(self, paper:ArXivPaper, num_citations:int=10, num_references:int=10):
        self.paper = paper 
        self.num_citations = num_citations
        self.num_references = num_references
        self.citation_children = None
        self.reference_children = None
    
    def __repr__(self):
        return self.paper.__repr__()

    def is_reference_leaf(self):
         return self.paper['numReferences'] == 0

    def is_citation_leaf(self):
         return self.paper['numCitations'] == 0
    
    def get_citation_children(self):
        if not self.is_citation_leaf():
            self.citation_children = self.paper.get_top_k_citations_information(self.num_citations)

    def get_reference_children(self):
        if not self.is_reference_leaf():
            self.reference_children = self.paper.get_top_k_references_information(self.num_references)

    def get_citation_children_metadata(self):
        essential_metadata_keys = ['abstract', 'arxivId', 'numCitations', 'title']
        citation_children_metadata = self.paper.get_top_k_citations_metadata(self.num_citations)
        
        for idx, child in enumerate(citation_children_metadata):
            citation_children_metadata[idx] = {key:val for key, val in child.paper.items() if key in essential_metadata_keys}
        
        return citation_children_metadata

    def get_reference_children_metadata(self):
        essential_metadata_keys = ['abstract', 'arxivId', 'numReferences', 'title']
        reference_children_metadata = self.paper.get_top_k_references_metadata(self.num_references)

        for idx, child in enumerate(reference_children_metadata):
            reference_children_metadata[idx] = {key:val for key, val in child.paper.items() if key in essential_metadata_keys}

        return reference_children_metadata

class Graph():
    def __init__(self, root:GraphNode):
        self.root = root
        self.citation_branch = []
        self.reference_branch = []

    def get_root_citations(self):
        return self.root.get_citation_children_metadata()

    def get_root_references(self):
        return self.root.get_reference_children_metadata()

    def build_citations_subtree(self, arxiv_idx:str):
        node = GraphNode(ArXivPaper(arxiv_idx))
        self.citation_branch.append(node)
        return node.get_citation_children_metadata()

    def build_references_subtree(self, arxiv_idx:str):
        node = GraphNode(ArXivPaper(arxiv_idx))
        self.references_branch.append(node)
        return node.get_reference_children_metadata()

    
    

In [77]:
paper = GraphNode(ArXivPaper("1806.07366"))

In [78]:
g = Graph(paper)

In [79]:
g.get_root_citations()

[{'abstract': 'Over the past few years, deep learning has risen to the foreground as a topic of massive interest, mainly as a result of successes obtained in solving large-scale image processing tasks. There are multiple challenging mathematical problems involved in applying deep learning: most deep learning methods require the solution of hard optimisation problems, and a good understanding of the tradeoff between computational effort, amount of data and model complexity is required to successfully design a deep learning approach for a given problem. A large amount of progress made in deep learning has been based on heuristic explorations, but there is a growing effort to mathematically understand the structure in existing deep learning methods and to systematically design new deep learning methods to preserve certain types of structure in deep learning. In this article, we review a number of these directions: some deep neural networks can be understood as discretisations of dynamical

In [80]:
g.build_citations_subtree('1902.09689')

[{'abstract': 'Random Recurrent Neural Networks (RRNN) are the simplest recurrent networks to model and extract features from sequential data. The simplicity however comes with a price; RRNN are known to be susceptible to diminishing/exploding gradient problem when trained with gradient-descent based optimization. To enhance robustness of RRNN, alternative training approaches have been proposed. Specifically, FORCE learning approach proposed a recursive least squares alternative to train RRNN and was shown to be applicable even for the challenging task of target-learning, where the network is tasked with generating dynamic patterns with no guiding input. While FORCE training indicates that solving target-learning is possible, it appears to be effective only in a specific regime of network dynamics (edge-of-chaos). We thereby investigate whether initialization of RRNN connectivity according to a tailored distribution can guarantee robust FORCE learning. We are able to generate such dist

In [81]:
g.citation_branch

[Paper Title: AntisymmetricRNN: A Dynamical System View on Recurrent Neural Networks 
 
 1) Abstract: 
 Recurrent neural networks have gained widespread use in modeling sequential data. Learning long-term dependencies using these models remains difficult though, due to exploding or vanishing gradients. In this paper, we draw connections between recurrent networks and ordinary differential equations. A special form of recurrent networks called the AntisymmetricRNN is proposed under this theoretical framework, which is able to capture long-term dependencies thanks to the stability property of its underlying differential equation. Existing approaches to improving RNN trainability often incur significant computation overhead. In comparison, AntisymmetricRNN achieves the same goal by design. We showcase the advantage of this new architecture through extensive simulations and experiments. AntisymmetricRNN exhibits much more predictable dynamics. It outperforms regular LSTM models on tasks re

In [82]:
g.build_citations_subtree('1909.13334')

[{'abstract': 'Physical phenomena in the real world are often described by energy-based modeling theories, such as Hamiltonian mechanics or the Landau theory, which yield various physical laws. Recent developments in neural networks have enabled the mimicking of the energy conservation law by learning the underlying continuous-time differential equations. However, this may not be possible in discrete time, which is often the case in practical learning and computation. Moreover, other physical laws have been overlooked in the previous neural network models. In this study, we propose a deep energy-based physical model that admits a specific differential geometric structure. From this structure, the conservation or dissipation law of energy and the mass conservation law follow naturally. To ensure the energetic behavior in discrete time, we also propose an automatic discrete differential algorithm that enables neural networks to employ the discrete gradient method.',
  'arxivId': '1905.08

In [83]:
g.citation_branch

[Paper Title: AntisymmetricRNN: A Dynamical System View on Recurrent Neural Networks 
 
 1) Abstract: 
 Recurrent neural networks have gained widespread use in modeling sequential data. Learning long-term dependencies using these models remains difficult though, due to exploding or vanishing gradients. In this paper, we draw connections between recurrent networks and ordinary differential equations. A special form of recurrent networks called the AntisymmetricRNN is proposed under this theoretical framework, which is able to capture long-term dependencies thanks to the stability property of its underlying differential equation. Existing approaches to improving RNN trainability often incur significant computation overhead. In comparison, AntisymmetricRNN achieves the same goal by design. We showcase the advantage of this new architecture through extensive simulations and experiments. AntisymmetricRNN exhibits much more predictable dynamics. It outperforms regular LSTM models on tasks re

In [48]:
references = paper.get_top_k_references_metadata(3)

In [49]:
type(references[0])

__main__.ArXivPaper

In [50]:
node = GraphNode(paper=paper, num_citations=20, num_references=2)

In [51]:
ref = node.get_reference_children_metadata()


In [52]:
ref

[{'abstract': 'Existing sequence prediction methods are mostly concerned with time-independent sequences, in which the actual time span between events is irrelevant and the distance between events is simply the difference between their order positions in the sequence. While this time-independent view of sequences is applicable for data such as natural languages, e.g., dealing with words in a sentence, it is inappropriate and inefficient for many real world events that are observed and collected at unequally spaced points of time as they naturally arise, e.g., when a person goes to a grocery store or makes a phone call. The time span between events can carry important information about the sequence dependence of human behaviors. In this work, we propose a set of methods for using time in sequence prediction. Because neural sequence models such as RNN are more amenable for handling token-like input, we propose two methods for time-dependent event representation, based on the intuition on

In [54]:
cit = node.get_citation_children_metadata()

In [55]:
cit

[{'abstract': 'Over the past few years, deep learning has risen to the foreground as a topic of massive interest, mainly as a result of successes obtained in solving large-scale image processing tasks. There are multiple challenging mathematical problems involved in applying deep learning: most deep learning methods require the solution of hard optimisation problems, and a good understanding of the tradeoff between computational effort, amount of data and model complexity is required to successfully design a deep learning approach for a given problem. A large amount of progress made in deep learning has been based on heuristic explorations, but there is a growing effort to mathematically understand the structure in existing deep learning methods and to systematically design new deep learning methods to preserve certain types of structure in deep learning. In this article, we review a number of these directions: some deep neural networks can be understood as discretisations of dynamical

In [6]:
node.get_reference_children()

In [7]:
node.citation_children

[{'arxivId': '2006.03364',
  'authors': [{'authorId': '2791391', 'name': 'E. Celledoni'},
   {'authorId': '48125860', 'name': 'Matthias Joachim Ehrhardt'},
   {'authorId': '11371884', 'name': 'Christian Etmann'},
   {'authorId': '47283982', 'name': 'R. McLachlan'},
   {'authorId': '1868160', 'name': 'B. Owren'},
   {'authorId': '1711104', 'name': 'C. Schönlieb'},
   {'authorId': '52585009', 'name': 'F. Sherry'}],
  'title': 'Structure preserving deep learning',
  'url': 'https://www.semanticscholar.org/paper/96efa5af47c75fe90909cbceafe7524714c9e5b9',
  'venue': 'ArXiv',
  'year': 2020},
 {'arxivId': '1902.09689',
  'authors': [{'authorId': '144757437', 'name': 'B. Chang'},
   {'authorId': '1743082', 'name': 'Minmin Chen'},
   {'authorId': '145761835', 'name': 'E. Haber'},
   {'authorId': '2226805', 'name': 'Ed Huai-hsin Chi'}],
  'title': 'AntisymmetricRNN: A Dynamical System View on Recurrent Neural Networks',
  'url': 'https://www.semanticscholar.org/paper/e2c8a6b49cd999b16ac4dcfdc37

In [76]:
node.reference_childen

[{'arxivId': '1708.00065',
  'authors': [{'authorId': None, 'name': 'Yang Li'},
   {'authorId': '145585757', 'name': 'Nan Du'},
   {'authorId': '1751569', 'name': 'S. Bengio'}],
  'title': 'Time-Dependent Representation for Neural Event Sequence Prediction',
  'url': 'https://www.semanticscholar.org/paper/ec7bab52b2220a6cad410dd82b3fbe140d2196f0',
  'venue': 'ICLR',
  'year': 2018},
 {'arxivId': '1606.04130',
  'authors': [{'authorId': '32219137', 'name': 'Zachary Chase Lipton'},
   {'authorId': '2107807', 'name': 'David C. Kale'},
   {'authorId': '144616817', 'name': 'R. Wetzel'}],
  'title': 'Directly Modeling Missing Data in Sequences with RNNs: Improved Classification of Clinical Time Series',
  'url': 'https://www.semanticscholar.org/paper/562f33611cdc0d8ed6609aa09f153e6238d5409e',
  'venue': 'MLHC',
  'year': 2016},
 {'arxivId': '1505.05770',
  'authors': [{'authorId': '1748523', 'name': 'Danilo Jimenez Rezende'},
   {'authorId': '14594344', 'name': 'S. Mohamed'}],
  'title': 'Va

In [52]:
references

[{'arxivId': '1708.00065',
  'authors': [{'authorId': None, 'name': 'Yang Li'},
   {'authorId': '145585757', 'name': 'Nan Du'},
   {'authorId': '1751569', 'name': 'S. Bengio'}],
  'title': 'Time-Dependent Representation for Neural Event Sequence Prediction',
  'url': 'https://www.semanticscholar.org/paper/ec7bab52b2220a6cad410dd82b3fbe140d2196f0',
  'venue': 'ICLR',
  'year': 2018},
 {'arxivId': '1606.04130',
  'authors': [{'authorId': '32219137', 'name': 'Zachary Chase Lipton'},
   {'authorId': '2107807', 'name': 'David C. Kale'},
   {'authorId': '144616817', 'name': 'R. Wetzel'}],
  'title': 'Directly Modeling Missing Data in Sequences with RNNs: Improved Classification of Clinical Time Series',
  'url': 'https://www.semanticscholar.org/paper/562f33611cdc0d8ed6609aa09f153e6238d5409e',
  'venue': 'MLHC',
  'year': 2016},
 {'arxivId': '1505.05770',
  'authors': [{'authorId': '1748523', 'name': 'Danilo Jimenez Rezende'},
   {'authorId': '14594344', 'name': 'S. Mohamed'}],
  'title': 'Va

In [53]:
references_papers = paper.get_top_k_references_metadata(3)

In [54]:
references_papers

[Paper Title: Time-Dependent Representation for Neural Event Sequence Prediction 
 
 1) Abstract: 
 Existing sequence prediction methods are mostly concerned with time-independent sequences, in which the actual time span between events is irrelevant and the distance between events is simply the difference between their order positions in the sequence. While this time-independent view of sequences is applicable for data such as natural languages, e.g., dealing with words in a sentence, it is inappropriate and inefficient for many real world events that are observed and collected at unequally spaced points of time as they naturally arise, e.g., when a person goes to a grocery store or makes a phone call. The time span between events can carry important information about the sequence dependence of human behaviors. In this work, we propose a set of methods for using time in sequence prediction. Because neural sequence models such as RNN are more amenable for handling token-like input, we p

In [13]:
#

In [21]:
c = 0
for i in paper['citations']:
    if i['isInfluential'] == True:
        c+=1

In [22]:
c

186

In [35]:
paper.paper['citations'][0]

{'arxivId': None,
 'authors': [{'authorId': '104314859', 'name': 'Fred Daum'},
  {'authorId': '50535618', 'name': 'J. Huang'},
  {'authorId': '9130376', 'name': 'A. Noushin'}],
 'doi': '10.1117/12.2517980',
 'intent': ['background'],
 'isInfluential': False,
 'paperId': 'd13739de9b7e22eea9ff03c23d322817c14bdfd8',
 'title': "Extremely deep Bayesian learning with Gromov's method",
 'url': 'https://www.semanticscholar.org/paper/d13739de9b7e22eea9ff03c23d322817c14bdfd8',
 'venue': 'Defense + Commercial Sensing',
 'year': 2019}