In [1]:
import dash
print(dash.__version__)
import dash_bootstrap_components as dbc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
print(dcc.__version__)
import dash_table
import warnings
warnings.filterwarnings("ignore")

1.20.0
1.16.0


In [2]:
from Bio import Entrez
from Bio import Medline
import re
import nltk
from nltk import word_tokenize
nltk.download('punkt')
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

import pandas as pd
import numpy as np


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\gharbi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\gharbi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\gharbi\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
def second_preprocessing(df_corpus):
    j=0
    for i in df_corpus['Abstract']:
        if i=="null":
        df_corpus = df_corpus.drop(labels=j)
    j=j+1
    return df_corpus


In [3]:

def preprocessing(text):
    
    # suppression de pontuation et caracteres numériques
    text = re.sub('[^a-zA-Z]',' ', text)
    # en lettres minuscules 
    text = text.lower()
    # tokenisation : prendre chaque mot a sa case
    text = word_tokenize(text)
    # suppression des stop words
    stop_words = stopwords.words('english')
    text = [word for word in text if word not in stop_words]
    # Lemmatiser les mots : exemple : Screening => screen , investigated => investigate
    lemma = WordNetLemmatizer()
    text = [lemma.lemmatize(word=w, pos='v') for w in text]
    # supprimer les mots de taille inférieur à 2
    text= [i for i in text if len(i) > 2]
    # reconvertir en String
    text = ' '.join(text)
    
    return text
def search(term, rmax):
    Entrez.email = ''
    handle = Entrez.esearch(db='pubmed', # DB
                            sort='relevance',  # tri par relevance
                            retmax=rmax, # combien d'articles
                            retmode='xml', 
                            term=term) # mot clé
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'your.email@example.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

    
def parse_authors(id_list):
    h = Entrez.efetch(db='pubmed', id=id_list, rettype='medline', retmode='text')
    records = Medline.parse(h)
    authors_list = []
    for record in records:
        au = record.get('AU', '?')
        for a in au: 
            if a not in authors_list:
                authors_list.append(a)
    return authors_list

def article_date(source):
    try :
        return source[0]['Year'] + '-' + source[0]['Month'] + '-' + source[0]['Day']
    except : return np.nan
    
def recup(num,terme):
    title_list = []
    abst_list = []
    date_list = []
    results = search(terme, num)
    #   IDs :
    id_list = results['IdList']

    papers = fetch_details(id_list)
    for i, paper in enumerate(papers['PubmedArticle']):

    #       titles : 
        title = paper['MedlineCitation']['Article']['ArticleTitle']
        print("{}) {}".format(i+1, title))
        title_list.append(title)

    #       Abstracts : 
        try:
            abst = paper['MedlineCitation']['Article']['Abstract']['AbstractText']
            abst = str(abst).strip("['']")
            abst_list.append(abst)
        except:
            abst = "null"
            abst_list.append(abst)

    #       Dates : 
        d = article_date(paper['MedlineCitation']['Article']['ArticleDate'])
        date_list.append(d)

    #   Authors:
    auth_list = []
    for id in id_list:
        auth = parse_authors(id)
        auth_list.append(auth)

    for i, abst in enumerate(abst_list):
        abst_list[i] = preprocessing(abst)

    dict_corpus = {'Article ID' : id_list, 'Titre' : title_list, 'Auteurs': auth_list, 'Date': date_list, 'Abstract' : abst_list}
    df_corpus = pd.DataFrame(dict_corpus)

    for i,a in enumerate(df_corpus['Auteurs']):
        df_corpus['Auteurs'][i] = ', '.join(a)
    df_corpus = second_preprocessing(df_corpus)
    return df_corpus


In [None]:
df = recup(1000,"FEVER")

In [4]:
def vectorisation_text(text,mindf,maxdf):    
    vectorizer = TfidfVectorizer(min_df=mindf,
                               max_df=maxdf,
                               max_features=None,
                               stop_words='english').fit(text)
    
    X = vectorizer.fit_transform(text)
    features = vectorizer.get_feature_names()
    return X, features

# Pages

In [5]:
app = dash.Dash(__name__,external_stylesheets=[dbc.themes.BOOTSTRAP],prevent_initial_callbacks=True)

In [6]:
search_bar = dbc.Row(
    [
        dbc.Col(
                dbc.Input(id="num_max",type="number", placeholder="articles numbre", className="mr-3"),className="col-4"),
        dbc.Col(dbc.Input(id ="word", type="search", placeholder="Covid-19", className="mr-3"),className="col-4"),
        dbc.Col(
            dbc.Button("recuperer",id="submitt_search", color="success", className="ml-2"),
            width="auto",
        ),
    ],
    no_gutters=True,
    className="mr-auto flex-nowrap mt-3 mt-md-0",
    align="center",
    id="navbar_out"
)

sidebar = html.Div(
    id = "links",
    className="sidebare",
    children =[
        dbc.Nav(
            [
                dbc.Button("details fetching", href="/page-1", active="exact", id='issam'),
                dbc.Button("Corpus exploitation",id="moh", href="/page-2", active="exact",n_clicks=0),
            ],
            vertical=True,
            pills=True,
        ),
    ]
)
graphe = html.Div(
    id='table',
     children=[
       
    ],

)
information_gen = dbc.Row(


    className="row",
)
visualisation = html.Div(
    className="content",
    children=[
        graphe,
        information_gen,
        
    ]

)


In [7]:
navbar = dbc.Navbar(
    [
        html.A(
            # Use row and col to control vertical alignment of logo / brand
            dbc.Row(
                [
                    dbc.Col(dbc.NavbarBrand("Corpus médical", className="ml-2")),
                ],
                align="center",
                no_gutters=True,
            ),
            href="https://plot.ly",
        ),
        dbc.NavbarToggler(id="navbar-toggler"),
        dbc.Collapse(search_bar, id="navbar-collapse", navbar=True),
        #no_gutters=True,
    ],
    color="dark",
    dark=True,
)

page_1_layout = html.Div(
    className="CONTENT_STYLE",
                  
                 children = [
                     visualisation
                 ]
                 
)
page_2_layout =html.Div(
    id="layout2",
    children =[dcc.Dropdown(
        id='demo-dropdown',
        options=[
            {'label': 'LDA', 'value': 'lda'},
            {'label': 'Co-clustering " with spherical k-means"', 'value': 'cokmeans'},
            {'label': 'Co-clustering "with specMod"', 'value': 'cospecmod'},
            {'label': 'Co-clustering "with Mod"', 'value': 'comod'}
         ]
    ),]
    
)

In [8]:
app.layout = html.Div([dcc.Store(id='storage'),dcc.Location(id="url"), navbar,sidebar,html.Div(id ="page-content")])

In [9]:
# add callback for toggling the collapse on small screens
@app.callback(
    Output("navbar-collapse", "is_open"),
    [Input("navbar-toggler", "n_clicks")],
    [State("navbar-collapse", "is_open")]
)
def toggle_navbar_collapse(n,is_open):
    if n:
        return not is_open
    return is_open

In [10]:
@app.callback(
    Output("layout2", "children"),
    [Input("moh","n_clicks"),
     Input('table','children')],
    [State('layout2','children')]
)
def fetch(n_clicks,old_output):
    return old_output + [dash_table.DataTable(
    style_cell={
        'whiteSpace': 'normal',
        'height': 'auto',
    },
    style_table={
            'width': 950,
            'overflowY': 'auto',
            'overflowX': 'auto',
            'height': 540},
    columns=[{"name": i, "id": i} for i in df.columns],
    data=df.to_dict('records'),
    export_format="csv",)]

In [11]:
@app.callback(
    Output("table", "children"),
    [Input("submitt_search","n_clicks")],
    [State('num_max', 'value'),
     State('word', 'value'),
     State('table','children')]
)
def fetch(n_clicks,num_max,word,old_output):
    global df 
    df= recup(num_max,word)
    return old_output + [dash_table.DataTable(
    style_cell={
        'whiteSpace': 'normal',
        'height': 'auto',
    },
    style_table={
            'width': 950,
            'overflowY': 'auto',
            'overflowX': 'auto',
            'height': 540},
    columns=[{"name": i, "id": i} for i in df.columns],
    data=df.to_dict('records'),
    export_format="csv",)]

In [12]:
@app.callback(Output('page-content', 'children'),
              [Input('url', 'pathname')])
def display_page(pathname):
    if pathname == '/page-1':
        return page_1_layout
    elif pathname == '/page-2':
        return page_2_layout
    else:
        return page_index
    # You could also return a 404 "URL not found" page here

In [None]:
if __name__ == "__main__":
    app.config.suppress_callback_exceptions = True
    app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [16/May/2021 20:34:11] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:11] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:11] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\gharbi\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\gharbi\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\gharbi\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\gharbi\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\gharbi\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\gharbi\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\gharbi\anaconda3\lib\site-packages\dash\dash.py", line 1079, in dispatch
    respons

127.0.0.1 - - [16/May/2021 20:34:11] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [16/May/2021 20:34:13] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


1) Fever: Could A Cardinal Sign of COVID-19 Infection Reduce Mortality?
2) Chemotherapy-related fever or infection fever?


127.0.0.1 - - [16/May/2021 20:34:25] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:25] "[37mGET /_dash-component-suites/dash_table/async-highlight.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:25] "[37mGET /_dash-component-suites/dash_table/async-table.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:26] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:36] "[37mGET /_dash-component-suites/dash_html_components/dash_html_components.min.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:36] "[37mGET /_dash-component-suites/dash_core_components/dash_core_components-shared.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:36] "[37mGET /_dash-component-suites/dash_table/bundle.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:36] "[37mGET /_dash-component-suites/dash_table/async-highlight.js.map HTTP/1.1[0m" 200 -
127.0.0.1 - - [16/May/2021 20:34:36] "[37mGET

1) Fever: Could A Cardinal Sign of COVID-19 Infection Reduce Mortality?
2) Chemotherapy-related fever or infection fever?


127.0.0.1 - - [16/May/2021 20:38:37] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
