In [1]:
# Python3

In [4]:
import pandas as pd
import qgrid
import spacy
from IPython.display import display, HTML, IFrame
from pyvis.network import Network
from spacy import displacy

In [5]:
nlp = spacy.load('en_core_web_lg')

In [6]:
text = 'Bruce works for Iron Maiden as a pilot and sometimes as a singer. He lives in a castle in Coventry.'

In [7]:
doc = nlp(text)

In [8]:
# On doc:
#     lang_
#     cats
#     ents
#     noun_chunks
#     sentiment

# On tokens:
#     i
#     idx
#     text
#     lemma_
#     shape_
#     pos_
#     tag_
#     dep_
#     ent_iob_
#     ent_type_
#     head
#     children

#     n_lefts
#     lefts
#     left_edge
#     n_rights
#     rights
#     right_edge

#     lex
#     sentiment

#     is_ascii
#     is_alpha
#     is_digit
#     is_currency

#     is_lower
#     is_upper
#     is_title

#     is_punct
#     is_left_punct
#     is_right_punct
#     is_bracket
#     is_stop
#     is_space

#     is_sent_start
#     is_sent_end

#     is_quote
#     is_oov

#     like_num
#     like_url
#     like_email

In [9]:
displacy_options_ent = {
    'colors': {
        '': '#EBEBEB',

        'PERSON': '#49BAF2',

        'ORG': '#F7B24B',

        'GPE': '#A7CC7B',
        'LOC': '#A7CC7B',
        'FAC': '#A7CC7B',
        'NORP': '#D0F2AA',

        'DATE': '#D273FF',
        'TIME': '#D273FF',
        'EVENT': '#D273FF',

        'CARDINAL': '#FFCC33',
        'ORDINAL': '#FFCC33',
        'QUANTITY': '#FFCC33',
        'PERCENT': '#FFCC33',
        'MONEY': '#FFCC33',

        'PRODUCT': '#2BD881',
        'WORK_OF_ART': '#2BD881',

        'LANGUAGE': '#FF974D',
        'LAW': '#469BB4',
    },
}
html_ents = displacy.render(
    doc,
    minify = True,
    style = 'ent',
    options = displacy_options_ent,
)

In [10]:
displacy_options_dep = {
    'font': 'Gill Sans',
    'color': '#515C99',
    'compact': True,
    'collapse_punct': False,
}
html_dep = displacy.render(
    doc,
    minify = True,
    style = 'dep',
    options = displacy_options_dep,
)

In [11]:
g_pyvis = Network(
    directed = True,
    notebook = True,
    width = '100%',
    height = '100%',
)
g_pyvis.prep_notebook()
g_pyvis.force_atlas_2based(
    gravity = -100,
    central_gravity = 0.005,
    spring_length = 50,
    spring_strength = 0.1,
    damping = 2.0,
    overlap = 0.5,
)

for token in doc:
    label = \
        str(token.i) + \
        ': ' + \
        str(token.idx) + \
        ': ' + \
        token.pos_ + \
        ': ' + \
        (
            (token.ent_type_ + ': ')
            if (token.ent_type_)
            else ''
        ) + \
        token.text
    g_pyvis.add_node(
        token.i,
        label = label,
        title = label,
        font = '14px gillsans #424242',
        color = displacy_options_ent['colors'][token.ent_type_],
    )

for token in doc:
    for child in token.children:
        g_pyvis.add_edge(
            token.i,
            child.i,
            label = child.dep_,
            title = child.dep_,
            font = '12px gillsans-italic #A9A9A9',
        )

g_pyvis.show('images/graph.html')
display(
    IFrame(
        'images/graph.html',
        width = '100%',
        height = '1000px',
    ),
)

In [12]:
df = pd.DataFrame(
    [
        [
            token.i,
            token.idx,
            token.text,
            token.lemma_,
            token.shape_,
            token.pos_,
            token.tag_,
            token.dep_,
            token.ent_iob_,
            token.ent_type_,
            token.head,
            [(child.i, child.idx, child.text) for child in token.children],
            token.is_punct,
        ]
        for token in doc
    ],
    columns = [
        'idx token',
        'idx char',
        'Text',
        'Lemma',
        'Shape',
        'POS coarse',
        'POS fine',
        'Dependency',
        'Ent IOB',
        'Ent type',
        'Head',
        'Children',
        'is punct',
    ],
)
qgrid_widget = qgrid.show_grid(
    df,
#     grid_options={
#         'enableColumnReorder': True,
#     },
#     show_toolbar = True, # This gives a full-screen button, but also add/remove row buttons which we don't want ...
)
display(qgrid_widget)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…