## Load this section first

In [44]:
%load_ext autoreload
%autoreload 2

import numpy as np
from importlib import reload
import evaluation_helper
from loadutils import construct_embedding_matrix
from loadutils import conll2003Data, loadDevPredictionsData

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [45]:
TRAIN_FILE = "../data/conll2003/eng.train"
DEV_FILE = "../data/conll2003/eng.testa"
TEST_FILE = "../data/conll2003/eng.testb"

global_max_features = 20000
windowLength = 9
global_embed_dim = 50
#testNumSents = 20000

# Use training set to build vocab here
vocabData = conll2003Data(TRAIN_FILE)
vocabData.buildVocab( vocabSize=global_max_features)

# Format training data
trainX, trainX_pos, trainX_capitals, trainY  = vocabData.formatWindowedData( 
                                                  vocabData.train_sentences, 
                                                  windowLength=windowLength,
                                                  verbose=False)

# read in dev data
devSents = vocabData.readFile( DEV_FILE)
devX, devX_pos, devX_capitals, devY = vocabData.formatWindowedData( 
                                              devSents, 
                                              windowLength=windowLength,
                                              verbose=False)

# read in the test data
testSents = vocabData.readFile( TEST_FILE)
testX, testX_pos, testX_capitals, testY = vocabData.formatWindowedData( 
                                                testSents, 
                                                windowLength=windowLength,
                                                verbose=False)

# load embeddings
embedding_matrix = construct_embedding_matrix( global_embed_dim, 
                                               global_max_features, vocabData)


# Get decoder Y -- 50 dim embedding of center word
train_decoderY = embedding_matrix[trainX[:,4]]
dev_decoderY = embedding_matrix[devX[:,4]]
test_decoderY = embedding_matrix[testX[:,4]]

----------------------------------------------------
reading file from path ../data/conll2003/eng.train
'readFile'  1270.15 ms
----------------------------------------------------
building vocabulary from TRAINING data...
'buildVocab'  882.79 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  1437.07 ms
----------------------------------------------------
reading file from path ../data/conll2003/eng.testa
'readFile'  199.86 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  263.70 ms
----------------------------------------------------
reading file from path ../data/conll2003/eng.testb
'readFile'  181.21 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  435.70 ms
Loading vectors from data/glove/glove.6B.zip
Parsing file: data/glove/glove.6B.zip:glove.6B.50d.txt
Found 400,000 wor

## Demo  - Quick access

In [9]:
reload(evaluation_helper)
from evaluation_helper import get_f1_by_modelName, compare_models_by_f1

modelName_list = ['encoder_2e_withsaving_again', 'encoder_2e_withsaving_again']
compare_models_by_f1(modelName_list, y_true=devY, return_results=False)

print ("------------------------------------------------------------")
modelName1 = 'encoder_2e_withsaving_again'
get_f1_by_modelName(modelName1, y_true=devY)


rank 1
modelName: encoder_2e_withsaving_again
f1= 0.880151139449758
------------------------------------------------------------


0.880151139449758

In [47]:
np.empty(0).any()

False

## Demo  - report class object

In [28]:
reload(evaluation_helper)
from evaluation_helper import EvalDev_Report

In [29]:
# load model predictions
modelName = 'encoder_2e_withsaving_again'
dev_raw_y_pred, dev_raw_y_pred_decoder_embeddings, dev_y_pred = loadDevPredictionsData(modelName)

# construct report object
report_obj = evaluation_helper.EvalDev_Report(modelName=modelName, y_true=devY, raw_y_pred=dev_raw_y_pred, y_pred=dev_y_pred, \
                                              y_true_decoder=dev_decoderY, y_pred_decoder=dev_raw_y_pred_decoder_embeddings) 
report_obj.connect_to_dataClass(vocabData)
report_obj.connect_to_devData(devData=(devX, devX_pos, devX_capitals, devY))

In [35]:
dev_raw_y_pred_decoder_embeddings.shape[1]

50

In [None]:
# work on decoder reconstruction plot space tSNE

if (dev_raw_y_pred_decoder_embeddings.ndim==2):
    

In [30]:
report_obj.print_whole_report(k=2, print_window=False)

# to save this report, do the following in a separate code chunk
# use the following to write this report output to a text file
# _14 means this is the 14th code chunk executed in this notebook
"""
with open('~/xyzzy.txt', 'w+') as f:
                        f.write(_14)
"""


-------------------------BRIEF SUMMARY-------------------------

Model     encoder_2e_withsaving_again
Precision 0.8943011397720456
Recall    0.8664419388585377
f1 score  0.880151139449758

Gold NER label counts:
42759 : ['O'] (tag3)
2092 : ['I-ORG'] (tag5)
2094 : ['I-LOC'] (tag6)
1264 : ['I-MISC'] (tag7)
3149 : ['I-PER'] (tag4)
4 : ['B-MISC'] (tag8)

Predicted NER label counts:
43027 : ['O'] (tag3)
2226 : ['I-LOC'] (tag6)
1005 : ['I-MISC'] (tag7)
3241 : ['I-PER'] (tag4)
1863 : ['I-ORG'] (tag5)

----------------GOLD to PREDICTION LABELS COUNTS---------------


Gold label "['O']" (tag3), prediction label counts:
42601 (0.9963%): "['O']" (tag3)
40 (0.0009%): "['I-PER']" (tag4)
75 (0.0018%): "['I-ORG']" (tag5)
11 (0.0003%): "['I-LOC']" (tag6)
32 (0.0007%): "['I-MISC']" (tag7)
0 (0.0%): "['B-MISC']" (tag8)
0 (0.0%): "['B-ORG']" (tag9)
0 (0.0%): "['B-LOC']" (tag10)

Gold label "['I-PER']" (tag4), prediction label counts:
45 (0.0143%): "['O']" (tag3)
3021 (0.9594%): "['I-PER']" (tag4)
64 (0

top 2 results

ID 6691
KL divergence 6.303609848022461
FEATURES:   "<unk>", JJ, noinfo
Gold NER    ['I-MISC']
Pred NER    ['O']
Text window ['<s>', 'the', 'move', 'to', '<unk>', 'atalanta', '<unk>', '<unk>', ',']
PoS window  ['<s>', 'DT', 'NN', 'TO', 'JJ', 'NNP', 'VBZ', 'NNP', ',']
Caps window ['<s>', 'upperInitial', 'lowercase', 'lowercase', 'noinfo', 'upperInitial', 'lowercase', 'upperInitial', 'noinfo']

ID 43588
KL divergence 5.999802589416504
FEATURES:   "division", NN, lowercase
Gold NER    ['I-MISC']
Pred NER    ['O']
Text window ['<s>', '<s>', '"', 'this', 'division', 'would', 'guarantee', 'a', '<unk>']
PoS window  ['<s>', '<s>', '"', 'DT', 'NN', 'MD', 'VB', 'DT', 'NN']
Caps window ['<s>', '<s>', 'noinfo', 'upperInitial', 'lowercase', 'lowercase', 'lowercase', 'lowercase', 'lowercase']
----------------------------
Label B-MISC (tag8)
indices counts = 4
ranked by worst cross-entropy loss
top 2 results

ID 42100
KL divergence 4.277178764343262
FEATURES:   "<unk>", NNPS, upperInit

## Just FYI -- more attributes and methods

In [12]:
%%capture

# gold labels
print ("devY",devY.shape)
# raw predictions made by a trained model on dev set
print ("dev_raw_y_pred", dev_raw_y_pred.shape)
# dev prediction labels
print ("dev_y_pred", dev_y_pred.shape)
# decoder on, decoder dev predictions
# decoder off, empty
print ("dev_raw_y_pred_decoder_embeddings",dev_raw_y_pred_decoder_embeddings.shape)

report_obj.recall
report_obj.precision
report_obj.f1
report_obj.gold_cts # gold label distribution
report_obj.pred_cts # predicted label distribution
# when gold is "O", but model thinks there is a NER tag
report_obj.hallucination_idx 
# when gold is a NER tag, but model think it is "O"
report_obj.missed_ner_idx
# when both model and gold indicate a NER tag, and the tags matches
report_obj.match_ner_idx
# when both model and gold indicate a NER tag, and the tags mismatch
report_obj.mismatch_ner_idx 
# dictionary[gold_label][prediction_label] --> data indices
report_obj.gold_pred_idx_dict 
# dictionary[gold_label][prediction_label] --> count
report_obj.gold_pred_ct_dict 
# dictionary[gold_label][prediction_label] --> data indices
report_obj.gold_pred_idx_dict 

In [10]:
print ("these two tags were never used")
print (report_obj.nerTags.ids_to_words([9]))
print (report_obj.nerTags.ids_to_words([10]))

these two tags were never used
['B-ORG']
['B-LOC']


## Ignore this section