<div align='left' style="width:29%;overflow:hidden;">
<a href='http://inria.fr'>
<img src='https://github.com/lmarti/jupyter_custom/raw/master/imgs/inr_logo_rouge.png' alt='Inria logo' title='Inria logo'/>
</a>
</div>

# RISOTTO

> A work document for testing a web GUI for RISOTTO

In [17]:
!pip install -q -r requirements.txt

In [2]:
from risotto.references import load_papers_from_metadata_file, build_papers_reference_graph, paper_as_markdown
from fastprogress.fastprogress import progress_bar

from risotto.lda import process_papers_file_contents, tokenizer

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

from pathlib import Path

import scispacy
import en_core_sci_sm
import networkx as nx
from collections import defaultdict
import numpy as np

In [3]:
cord19_dataset_folder = Path('./datasets/CORD-19-research-challenge')

In [4]:
papers, _ = load_papers_from_metadata_file(cord19_dataset_folder)

In [16]:
papers[1].bib_entries

{'BIBREF0': {'title': '',
  'authors': [],
  'year': None,
  'venue': 'FEBS Lett.',
  'volume': '301',
  'issn': '',
  'pages': '207-214',
  'other_ids': {'DOI': []}},
 'BIBREF1': {'title': '',
  'authors': [],
  'year': None,
  'venue': 'Acta Crystallogr. D',
  'volume': '50',
  'issn': '',
  'pages': '760-763',
  'other_ids': {'DOI': []}},
 'BIBREF2': {'title': '',
  'authors': [],
  'year': None,
  'venue': 'J. Virol.',
  'volume': '65',
  'issn': '',
  'pages': '6881-6891',
  'other_ids': {'DOI': []}},
 'BIBREF3': {'title': '',
  'authors': [],
  'year': None,
  'venue': 'J. Virol.',
  'volume': '67',
  'issn': '',
  'pages': '1-8',
  'other_ids': {'DOI': []}},
 'BIBREF4': {'title': '',
  'authors': [],
  'year': None,
  'venue': 'Proc. Natl Acad. Sci. USA',
  'volume': '90',
  'issn': '',
  'pages': '1716-1720',
  'other_ids': {'DOI': []}},
 'BIBREF5': {'title': '',
  'authors': [],
  'year': None,
  'venue': 'Biochemistry',
  'volume': '14',
  'issn': '',
  'pages': '4943-4952',


In [7]:
G = build_papers_reference_graph(papers)

In [8]:
pageranks = nx.pagerank(G)

In [13]:
list(pageranks.keys())[0]

<risotto.references.PMCPaper at 0x11219ff40>

In [6]:
docs = process_papers_file_contents(list(pageranks.keys()))

In [7]:
count_vectorizer = CountVectorizer(tokenizer=tokenizer, lowercase=True)
vectorized_docs = count_vectorizer.fit_transform(docs)

In [8]:
lda = LatentDirichletAllocation(n_components=10, verbose=0, n_jobs=-1)
lda = lda.fit(vectorized_docs)

In [15]:
from ipywidgets import AppLayout, Button, Layout
from ipywidgets import Button, Layout, jslink, IntText, IntSlider, Accordion, Text

def create_expanded_button(description, button_style):
    return Button(description=description, button_style=button_style, layout=Layout(height='auto', width='auto'))


In [None]:
nx.write_gpickle(G, cord19_dataset_folder/'pagerank.gml')

In [10]:
len(pageranks)

44648

In [16]:
header_button = create_expanded_button('Header', 'success')
left_button = create_expanded_button('Left', 'info')
center_button = create_expanded_button('Center', 'warning')
right_button = create_expanded_button('Right', 'info')
footer_button = create_expanded_button('Footer', 'success')

In [17]:
accordion = Accordion(children=[IntSlider(), Text()])
accordion.set_title(0, 'Slider')
accordion.set_title(1, 'Text')

In [18]:
AppLayout(header=header_button,
          left_sidebar=accordion,
          center=center_button,
          right_sidebar=right_button,
          footer=footer_button)

AppLayout(children=(Button(button_style='success', description='Header', layout=Layout(grid_area='header', hei…

---

In [18]:
# tell nbdev to generate library from notebooks
from nbdev.export import *
notebook2script()

Converted 00_downloader.ipynb.
Converted 01_references.ipynb.
Converted 02_representations_and_lda.ipynb.
Converted 03_hierarchical_topic_modelling.ipynb.
Converted 99_risotto_gui.ipynb.
Converted index.ipynb.


In [19]:
# this code is here for cosmetic reasons
from IPython.core.display import HTML
from urllib.request import urlopen
HTML(urlopen('https://raw.githubusercontent.com/lmarti/jupyter_custom/master/custom.include').read().decode('utf-8'))

---