In [1]:
%cd ..

/Users/brewer/Code/UU/CS6390/litbank-entities


# demo

In [2]:
import operator

import ipywidgets as widgets
import spacy
from IPython.display import display

from litbank_entities import extract, litbank, metrics
from litbank_entities.model import hmm_recognizer

2023-02-15 15:17:44.738029: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load data, language utility

In [3]:
text_sentence_tokens, text_sentence_labels = litbank.get_text_sentence_tokens_labels()
sentence_tokens, sentence_labels = litbank.flatten_texts(text_sentence_tokens, text_sentence_labels)
assert len(sentence_tokens) == len(sentence_labels)
print('Train sentences: {:d}'.format(len(sentence_tokens)))

Train sentences: 8562


In [4]:
nlp = spacy.load('en_core_web_sm')

## Train

In [5]:
model_options = ['ZeroR (dummy baseline)', 'HMM (Hidden Markov Model using only tokens)']
model_names = ['zero', 'hmm']
model_option_to_name = {model_options[i]: model_names[i] for i in range(len(model_options))}

model_dropdown = widgets.Dropdown(
    options=model_options,
    value=model_options[-1],
    description='Model:',
    disabled=False,
)
display(model_dropdown)

Dropdown(description='Model:', index=1, options=('ZeroR (dummy baseline)', 'HMM (Hidden Markov Model using onl…

In [6]:
print('Training model... ', end='')
model = extract.create_model(model_option_to_name[model_dropdown.value])
model.train(sentence_tokens, sentence_labels)
print('Done.')

Training model... Done.


## Infer

In [9]:
pearl_harbor = \
    'We have witnessed this morning the distant view a brief full battle of Pearl Harbor' \
    ' and the severe bombing of Pearl Harbor by enemy planes, undoubtedly Japanese.\n' \
    'The city of Honolulu has also been attacked and considerable damage done.'
sentence_textarea = widgets.Textarea(
    value=pearl_harbor,
    placeholder='Sentences separated by newline characters.',
    description='Find entities:',
    layout={'height': '16em'},
)
display(sentence_textarea)

Textarea(value='We have witnessed this morning the distant view a brief full battle of Pearl Harbor and the se…

In [10]:
sentence_tokens = [list(nlp(sentence)) for sentence in sentence_textarea.value.split('\n')]
sentence_preds = model.predict(sentence_tokens)
for s, tokens in enumerate(sentence_tokens):
    preds = sentence_preds[s]
    for pred, token in zip(preds, tokens):
        print('{:>5} {}'.format(pred[0], token))
    print('<END>')
    print()

    O We
B-ORG have
I-ORG witnessed
I-ORG this
I-ORG morning
I-ORG the
I-ORG distant
I-ORG view
I-ORG a
I-ORG brief
I-ORG full
I-ORG battle
I-ORG of
I-ORG Pearl
I-ORG Harbor
I-ORG and
I-ORG the
I-ORG severe
I-ORG bombing
I-ORG of
I-ORG Pearl
I-ORG Harbor
I-ORG by
I-ORG enemy
I-ORG planes
I-ORG ,
I-ORG undoubtedly
I-ORG Japanese
I-ORG .
<END>

    O The
B-ORG city
I-ORG of
I-ORG Honolulu
I-ORG has
I-ORG also
I-ORG been
I-ORG attacked
I-ORG and
I-ORG considerable
I-ORG damage
I-ORG done
I-ORG .
<END>

