# A Jupyter Widget To Interactively Explore Citations to a Papper

By [Jean Kossaifi](http://jeankossaifi.com)

[![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/JeanKossaifi.svg?style=social&label=Follow%20JeanKossaifi)](https://twitter.com/JeanKossaifi)


In [1]:
import requests
import pickle
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, HTML
from ipywidgets import Layout

I first wrote a convenience function to call the Semantic Scholar API: 

In [2]:
def get_article_citations(CorpusId=None, DOI=None, ARXIV=None, PMID=None, 
                          PMCID=None, URL=None, ACL=None, MAG=None,
                          filename_prefix='./article_citations', verbose=True):
    kwargs = locals().copy()
    del kwargs['filename_prefix']
    del kwargs['verbose']
    valid_ids = {k:v for k,v in kwargs.items() if v is not None}
    if not valid_ids:
        raise ValueError('At least one form of paper ID must be provided')
    elif len(valid_ids) > 1:
        raise ValueError(f'Only one form of ID should be given, but got {", ".join(valid_ids.keys())}')
    key = list(valid_ids.keys())[0]
    value = valid_ids[key]
    query = (f'https://api.semanticscholar.org/graph/v1/paper/{key}:{value}/citations?'
             'fields=title,'
             'authors,'
             'isInfluential,'
             'url,'
             'abstract,'
             'venue,'
             'year,'
             'referenceCount,'
             'citationCount,'
             'influentialCitationCount,'
             'intents,'
             'contexts'
             '&limit=999')
    
    if filename_prefix is not None:
        save_suffix = f'{key}_{value}'
        save_file = filename_prefix + save_suffix + '.pkl'

        if Path(save_file).exists():
            with open(save_file, 'rb') as f:
                citations = pickle.load(f)
        else:
            response = requests.get(query)

            result = response.json()
            citations = result['data']

            with open(save_file, 'wb') as f:
                pickle.dump(citations, f)
    if verbose:
        print(f'Fetched {len(citations)} citations for document {key}={value}.')
    
    return citations

## Visualising the result 

Let's see what a single citation looks like

In [3]:
# Example: let's look at the citations for the TensorLy paper
citations = get_article_citations(CorpusId=1104773)

Fetched 188 citations for document CorpusId=1104773.


Now, rather than stare at the raw json data, let's present it in a readable form. 

In [4]:
def tag(name, content='', href=None, cls=None):
    html = f'<{name}'
    if href is not None:
        html += f' href="{href}"'
    if cls is not None:
        html += f' class="{cls}"'
    html += '>'
    html += content
    html += f'</{name}>'
    
    return html

def citation_to_html(citation):
    paper = citation['citingPaper']

#     res = '<head> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css"></head>'
    if citation['isInfluential']:
        title = tag('i', cls='fas fa-star') + '&nbsp;' + paper['title']
    else:
        title = paper['title']
    res = tag(name='h3', content=title)

    if paper['authors']:
        res += tag('h4', 'By ' + ", ".join([author['name'] for author in paper['authors']]), cls='subtitle')
    
    if paper['venue']:
        res += tag('em', 'In ' + paper['venue'])
    else:
        res += 'Unknown venue'

    res + f", {paper['year']}. Got {paper['citationCount']} citations. "
    
    if citation['intents']:
        res += "&emsp; Intents=" + ", ".join(f'"{c}"' for c in citation['intents'])
    if paper['url']:
        res +=  '&emsp;' + tag('a', cls='button is-link', href=paper['url'], content='[link]')
    if citation['contexts']:
        res += f"<br/><br/> &emsp;" + tag('em', tag('b', 'Contexts of the citation: ')) + "<br/>"
        res += tag('ul', content="".join(tag('li', s) for s in citation['contexts']))
    if paper['abstract']:
        res += tag('div', tag('p', tag('em', tag('b', '&emsp; Abstract &emsp;')) + paper['abstract']), cls='abstract')
    
    return res

Let's view one of the citations:

In [5]:
HTML(citation_to_html(citations[17]))

## A Jupyter Widget to interactively check the citations

Now I just wrote a quick Widget to conveniently browse through the citations:

In [6]:
class CitationManager():
    
    def __init__(self, data, start=0, text=None, slider=None):
        self.value=start
        self.text = text
        self.slider = slider
        if data is not None:
            self.set_data(data, start)
    
    def _update_text(self):
        if self.text is not None:
            self.text.value = citation_to_html(self.data[self.value])
    
    def _update_slider(self):
        if self.slider is not None:
            self.slider.value = int(self.value)

    def previous_fun(self, button):
        if self.value > self.min:
            self.value -= 1
        self._update_text()
        self._update_slider()

    def next_fun(self, button):
        if (self.value) < self.max:
            self.value += 1
        self._update_text()
        self._update_slider()
        
    def set_value(self, change):
        if 0 <= (change['new']) < self.max:
            self.value = change['new']
        self._update_text()
    
    def set_data(self, data, start=0):
        self.value = start
        self.min = start
        self.max = len(data)
        self.data = data
        self._update_text()

# Create the widgets
button_height = '30px'
next_button = widgets.Button(
    description='Next',
    disabled=False,
    tooltip='next citation',
    icon='arrow-right',
    layout=Layout(width='10%', height=button_height)
)
previous_button = widgets.Button(
    description='Previous',
    disabled=False,
    tooltip='previous citation',
    icon='arrow-left',
    layout=Layout(width='10%', height=button_height)
)
text = widgets.HTML()
slider = widgets.IntSlider(value=0, min=0,
                            layout=Layout(width='60%', height=button_height))

# Manager to handle events and updates
manager = CitationManager(start=0, data=None, text=text, slider=slider)
next_button.on_click(manager.next_fun)
previous_button.on_click(manager.previous_fun)
    
play = widgets.Play(
    interval=150,
    value=0,
    min=0,
    step=1,
    description="Press play",
    disabled=False,
    layout=Layout(margin='0 auto 0 0')
)
widgets.jslink((play, 'value'), (slider, 'value'))

# Link slider to manager (so text is updated on slider update)
slider.observe(manager.set_value, names='value')

paper_type = widgets.Dropdown(
    options=['CorpusId', 'DOI', 'ARXIV', 'PMID', 'PMCID', 'URL', 'ACL', 'MAG'],
    value='CorpusId',
    description='Identifier:',
)
paper_id = widgets.Text(
    value='1104773',
    placeholder='Paper ID',
    description='Paper ID:',
    disabled=False
)
search_button = widgets.Button(
    description='Explore',
    disabled=False,
    button_style='info', 
    tooltip='Explore paper citations',
    icon='arrow-right',
    layout=Layout(width='15%', height=button_height, margin='0 0 0 auto')
)

# Display the widgets
search_line = widgets.HBox([paper_type, paper_id, search_button])
explore_line = widgets.HBox([play, previous_button, slider, next_button])
citation_explorer = widgets.VBox([search_line, explore_line, text], box_style='info')

def update_citations(button):
    ref = {paper_type.value:paper_id.value}
    data = get_article_citations(**ref, verbose=False)
    manager.set_data(data)
    slider.max = len(data)
    play.max = len(data)

search_button.on_click(update_citations)

## The Citation Explorer Widget In Action

And here's the result: just enter the ID of the paper of which you wish to look at the citations and press Explore!

In [7]:
display(citation_explorer)

VBox(box_style='info', children=(HBox(children=(Dropdown(description='Identifier:', options=('CorpusId', 'DOI'…