In [None]:
# Python3

In [None]:
# Uncomment the following cell if running as a Binder notebook, in order to hide the Jupyter header
# content and controls, so giving the appearance of a standalone application. Note that if you run
# this cell manually in a regular notebook session then the Jupyter header content and controls
# will still be hidden. Run this cell again to toggle back:

In [None]:
%%javascript
$('#header').toggle()

In [None]:
# For spaCy, we have the following useful attributes on the doc object and the tokens it contains.
# Note that other attributes are available, the following are just a selection:

# On doc:
#     lang_
#     cats
#     ents
#     noun_chunks
#     sentiment

# On tokens:
#     i
#     idx
#     text
#     lemma_
#     shape_
#     pos_
#     tag_
#     dep_
#     ent_iob_
#     ent_type_
#     head
#     children

#     n_lefts
#     lefts
#     left_edge
#     n_rights
#     rights
#     right_edge

#     lex
#     sentiment

#     is_ascii
#     is_alpha
#     is_digit
#     is_currency

#     is_lower
#     is_upper
#     is_title

#     is_punct
#     is_left_punct
#     is_right_punct
#     is_bracket
#     is_stop
#     is_space

#     is_sent_start
#     is_sent_end

#     is_quote
#     is_oov

#     like_num
#     like_url
#     like_email

In [None]:
import ipywidgets as widgets
# import neuralcoref
import pandas as pd
import qgrid
import spacy
from datetime import datetime
from IPython.display import display, HTML, IFrame
from pyvis.network import Network
from spacy import displacy

In [None]:
nlp = spacy.load('en_core_web_lg')
# neuralcoref.add_to_pipe(nlp)

In [None]:
displacy_options_ent = {
    'colors': {
        '': '#EBEBEB',

        'PERSON': '#49BAF2',

        'ORG': '#F7B24B',

        'GPE': '#A7CC7B',
        'LOC': '#A7CC7B',
        'FAC': '#A7CC7B',
        'NORP': '#D0F2AA',

        'DATE': '#D273FF',
        'TIME': '#D273FF',
        'EVENT': '#D273FF',

        'CARDINAL': '#FFCC33',
        'ORDINAL': '#FFCC33',
        'QUANTITY': '#FFCC33',
        'PERCENT': '#FFCC33',
        'MONEY': '#FFCC33',

        'PRODUCT': '#2BD881',
        'WORK_OF_ART': '#2BD881',

        'LANGUAGE': '#FF974D',
        'LAW': '#469BB4',
    },
}
def set_html_ent(doc):
    html_ent = displacy.render(
        doc,
        minify = True,
        style = 'ent',
        options = displacy_options_ent,
    )
    return html_ent

In [None]:
displacy_options_dep = {
    'font': 'Gill Sans',
    'color': '#515C99',
    'compact': True,
    'collapse_punct': False,
}
def set_html_dep(doc):
    html_dep = displacy.render(
        doc,
        minify = True,
        style = 'dep',
        options = displacy_options_dep,
    )
    return html_dep

In [None]:
def set_pyvis(doc):
    g_pyvis = Network(
        directed = True,
        notebook = True,
        width = '100%',
        height = '100%',
    )
    g_pyvis.prep_notebook()
    g_pyvis.force_atlas_2based(
        gravity = -100,
        central_gravity = 0.005,
        spring_length = 50,
        spring_strength = 0.1,
        damping = 2.0,
        overlap = 0.5,
    )

    for token in doc:
        label = \
            str(token.i) + \
            ': ' + \
            str(token.idx) + \
            ': ' + \
            token.pos_ + \
            ': ' + \
            (
                (token.ent_type_ + ': ')
                if (token.ent_type_)
                else ''
            ) + \
            token.text
        g_pyvis.add_node(
            token.i,
            label = label,
            title = label,
            font = '14px gillsans #424242',
            color = displacy_options_ent['colors'][token.ent_type_],
        )

    for token in doc:
        for child in token.children:
            g_pyvis.add_edge(
                token.i,
                child.i,
                label = child.dep_,
                title = child.dep_,
                font = '12px gillsans-italic #A9A9A9',
            )

    if (dropdown_set_sentedges.value == 0):
        for sent in list(doc.sents)[1:]:
            g_pyvis.add_edge(
                sent.start - 1,
                sent.start,
                label = 'sentence',
                title = 'sentence',
                font = '12px gillsans-italic #A9A9A9',
            )

#     if (dropdown_set_corefs.value == 0):
#         for coref_cluster in doc._.coref_clusters:
#             for span in coref_cluster[1:]:
#                 g_pyvis.add_edge(
#                     coref_cluster[0].root.i,
#                     span.root.i,
#                     label = 'coref',
#                     title = 'coref',
#                     font = '12px gillsans-italic #A9A9A9',
#                 )

    g_pyvis.show('images/graph.html')

In [None]:
def set_qgrid_widget(doc):
    df = pd.DataFrame(
        [
            [
                token.i,
                token.idx,
                token.text,
                token.lemma_,
                token.shape_,
                token.pos_,
                token.tag_,
                token.dep_,
                token.ent_iob_,
                token.ent_type_,
                token.head,
                [(child.i, child.idx, child.text) for child in token.children],
                token.is_punct,
            ]
            for token in doc
        ],
        columns = [
            'idx token',
            'idx char',
            'Text',
            'Lemma',
            'Shape',
            'POS coarse',
            'POS fine',
            'Dependency',
            'Ent IOB',
            'Ent type',
            'Head',
            'Children',
            'Is punct',
        ],
    )
    qgrid_widget = qgrid.show_grid(
        df,
    #     grid_options={
    #         'enableColumnReorder': True,
    #     },
    #     show_toolbar = True, # This gives a full-screen button, but also add/remove row buttons which we don't want ...
    )
    return qgrid_widget

In [None]:
output_names = ['NER', 'Dependency', 'Graph', 'Table']

In [None]:
# Layout styles
layout_main = {
    'width': '100%',
    'height': '100%',
}
layout_dropdown = {
    'width': '150px',
}
layout_button = {
    'width': '50px',
}

In [None]:
# Image widgets
image_header = widgets.Image(
    value = open('images/spaCyfaCey_Header.png', 'rb').read(),
    format = 'png',
    layout = layout_main,
)

In [None]:
# Input widgets
input_text = widgets.Textarea(
    placeholder = 'Text to process ...',
    layout = layout_main,
)

In [None]:
# Control widgets
dropdown_set_sentedges = widgets.Dropdown(
    value = 0,
    options = [
        ('All sentence edges', 0),
        ('No sentence edges', 1),
    ],
    layout = layout_dropdown,
)
# dropdown_set_corefs = widgets.Dropdown(
#     value = 0,
#     options = [
#         ('All corefs', 0),
#         ('No corefs', 1),
#     ],
#     layout = layout_dropdown,
# )
dropdown_set_mode = widgets.Dropdown(
    value = 0,
    options = [
        ('All outputs', 0),
        (output_names[0], 1),
        (output_names[1], 2),
        (output_names[2], 3),
        (output_names[3], 4),
    ],
    layout = layout_dropdown,
)
button_toggle_output_open = widgets.Button(
    icon = 'list',
    tooltip = 'Toggle all output displays',
    layout = layout_button,
)
button_clear = widgets.Button(
    icon = 'refresh',
    tooltip = 'Clear input and output',
    layout = layout_button,
)
button_go = widgets.Button(
    icon = 'play',
    tooltip = 'Go!',
    layout = layout_button,
)

In [None]:
# Output widgets
output_message = widgets.Output()
output_0 = widgets.Output()
output_1 = widgets.Output()
output_2 = widgets.Output()
output_3 = widgets.Output()

In [None]:
# Container widgets
children_header = [
    image_header,
    input_text,
]

children_control = [
    dropdown_set_sentedges,
#     dropdown_set_corefs,
    dropdown_set_mode,
    button_toggle_output_open,
    button_clear,
    button_go,
    output_message,
]

accordion_0 = widgets.Accordion(
    children = [output_0],
    layout = layout_main,
)
accordion_1 = widgets.Accordion(
    children = [output_1],
    layout = layout_main,
)
accordion_2 = widgets.Accordion(
    children = [output_2],
    layout = layout_main,
)
accordion_3 = widgets.Accordion(
    children = [output_3],
    layout = layout_main,
)
accordion_0.set_title(0, output_names[0])
accordion_1.set_title(0, output_names[1])
accordion_2.set_title(0, output_names[2])
accordion_3.set_title(0, output_names[3])

children_output = []

HBox_control = widgets.HBox(
    children = children_control,
)

VBox_main = widgets.VBox(
    layout = {
        'width': '90%',
        'min_height': '1500px',
        'left': '10%',
        'padding': '5px',
#         'border': '1px solid black',
    },
)

In [None]:
@output_message.capture(clear_output=True, wait=True)
def set_output_message(html):
    display(HTML(html))

# -------------------------------------------------------------------------------------------------

@output_0.capture(clear_output=True, wait=True)
def set_output_0(html):
    accordion_0.selected_index = 0
    display(HTML(html))

# -------------------------------------------------------------------------------------------------

@output_1.capture(clear_output=True, wait=True)
def set_output_1(html):
    accordion_1.selected_index = 0
    display(HTML(html))

# -------------------------------------------------------------------------------------------------

@output_2.capture(clear_output=True, wait=True)
def set_output_2():
    accordion_2.selected_index = 0
    display(
        IFrame(
            'images/graph.html',
            width = '100%',
            height = '1000px',
        ),
    )

# -------------------------------------------------------------------------------------------------

@output_3.capture(clear_output=True, wait=True)
def set_output_3(qgrid_widget):
    accordion_3.selected_index = 0
    display(qgrid_widget)

In [None]:
def set_output(dummy):
    global children_output
    children_output = []
    if (dropdown_set_mode.label == 'All outputs' or dropdown_set_mode.label == output_names[0]):
        children_output.append(accordion_0)
    if (dropdown_set_mode.label == 'All outputs' or dropdown_set_mode.label == output_names[1]):
        children_output.append(accordion_1)
    if (dropdown_set_mode.label == 'All outputs' or dropdown_set_mode.label == output_names[2]):
        children_output.append(accordion_2)
    if (dropdown_set_mode.label == 'All outputs' or dropdown_set_mode.label == output_names[3]):
        children_output.append(accordion_3)
    VBox_main.children = children_header + [HBox_control] + children_output

# -------------------------------------------------------------------------------------------------

def clear_output(dummy):
    for child in [accordion_0, accordion_1, accordion_2, accordion_3]:
        child.selected_index = None
    for output in [output_0, output_1, output_2, output_3]:
        output.clear_output()

# -------------------------------------------------------------------------------------------------

def clear_input_output(dummy):
    input_text.value = ''
    output_message.clear_output()
    clear_output(None)

# -------------------------------------------------------------------------------------------------

def toggle_control(dummy):
    control_disabled = children_control[0].disabled
    for child in children_control:
        child.disabled = not control_disabled

# -------------------------------------------------------------------------------------------------

def toggle_output_open(dummy):
    output_open = any([(child.selected_index == 0) for child in children_output])
    for child in children_output:
        child.selected_index = None if output_open else 0

# -------------------------------------------------------------------------------------------------

def go(dummy):
    if (not input_text.value):
        set_output_message('ERROR: Enter text for processing')
    else:

        toggle_control(None)
        set_output_message('Processing ...') # + '<i class="fa fa-gear fa-spin" style="font-size:18px"></i>')

        # -------------------------------------------------------------------------------------------------

        t1 = datetime.now()
        doc = nlp(input_text.value)
        t2 = datetime.now()

        deltat = t2 - t1
        deltat = round(deltat.seconds + (deltat.microseconds / 1e6), 6)

        # -------------------------------------------------------------------------------------------------

        toggle_control(None)
        clear_output(None)

        # -------------------------------------------------------------------------------------------------

        set_output_message('Done in: ' + str(deltat) + ' s')

        if ((dropdown_set_mode.label == 'All outputs') or (dropdown_set_mode.label == output_names[0])):
            html_ent = set_html_ent(doc)
            set_output_0(html_ent)

        if ((dropdown_set_mode.label == 'All outputs') or (dropdown_set_mode.label == output_names[1])):
            html_dep = set_html_dep(doc)
            set_output_1(html_dep)

        if ((dropdown_set_mode.label == 'All outputs') or (dropdown_set_mode.label == output_names[2])):
            set_pyvis(doc)
            set_output_2()

        if ((dropdown_set_mode.label == 'All outputs') or (dropdown_set_mode.label == output_names[3])):
            qgrid_widget = set_qgrid_widget(doc)
            set_output_3(qgrid_widget)

In [None]:
dropdown_set_mode.observe(set_output)
button_toggle_output_open.on_click(toggle_output_open)
button_clear.on_click(clear_input_output)
button_go.on_click(go)

In [None]:
set_output(None)
clear_output(None)
display(VBox_main)