## Load this section first

In [41]:
%load_ext autoreload
%autoreload 2

import numpy as np
from importlib import reload
import evaluation_helper

from loadutils import conll2003Data, loadDevPredictionsData

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
TRAIN_FILE = "../data/conll2003/eng.train"
DEV_FILE = "../data/conll2003/eng.testa"
TEST_FILE = "../data/conll2003/eng.testb"

In [42]:
# UPDATES!
global_max_features = 20000
windowLength = 9
#testNumSents = 20000

# Use training set to build vocab here
vocabData = conll2003Data(TRAIN_FILE)
vocabData.buildVocab( vocabSize=global_max_features)

# Format training data
trainX, trainX_pos, trainX_capitals, trainY  = vocabData.formatWindowedData( 
                                                  vocabData.train_sentences, 
                                                  windowLength=windowLength,
                                                  verbose=False)

# read in dev data
devSents = vocabData.readFile( DEV_FILE)
devX, devX_pos, devX_capitals, devY = vocabData.formatWindowedData( 
                                              devSents, 
                                              windowLength=windowLength,
                                              verbose=False)

# read in the test data
testSents = vocabData.readFile( TEST_FILE)
testX, testX_pos, testX_capitals, testY = vocabData.formatWindowedData( 
                                                testSents, 
                                                windowLength=windowLength,
                                                verbose=False)

----------------------------------------------------
reading file from path ../data/conll2003/eng.train
'readFile'  941.74 ms
----------------------------------------------------
building vocabulary from TRAINING data...
'buildVocab'  854.12 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  1469.17 ms
----------------------------------------------------
reading file from path ../data/conll2003/eng.testa
'readFile'  222.17 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  382.41 ms
----------------------------------------------------
reading file from path ../data/conll2003/eng.testb
'readFile'  196.22 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  338.70 ms


## Demo starts here

In [105]:
reload(evaluation_helper)

<module 'evaluation_helper' from '/home/vwaj/CapsNet_for_NER/code/evaluation_helper.py'>

In [107]:
# load dev predictions from a saved model
modelName = 'encoder_2e_withsaving_again'
dev_raw_y_pred, dev_raw_y_pred_decoder_embeddings, dev_y_pred = loadDevPredictionsData(modelName)

# this is your dev gold labels
print ("devY",devY.shape)
# this is the raw predictions made by a trained model on dev set
print ("dev_raw_y_pred", dev_raw_y_pred.shape)
# this is your dev prediction labels
print ("dev_y_pred", dev_y_pred.shape)
# if decoder is on, this is your decoder dev predictions
# if decoder is off, this is just empty
print ("dev_raw_y_pred_decoder_embeddings",dev_raw_y_pred_decoder_embeddings.shape)

devY (51362,)
dev_raw_y_pred (51362, 8)
dev_y_pred (51362,)
dev_raw_y_pred_decoder_embeddings (51362, 50)


In [101]:
# construct report object
report_obj = evaluation_helper.EvalDev_Report(y_true=devY, raw_y_pred=dev_raw_y_pred, y_pred=dev_y_pred) 

In [10]:
report_obj.recall 

0.21017536644831536

In [11]:
report_obj.precision

0.03513881181214119

In [12]:
report_obj.f1

0.060211054261478394

In [96]:
report_obj.gold_cts

Counter({3: 169578,
         4: 11128,
         5: 10001,
         6: 8286,
         7: 4556,
         8: 37,
         9: 24,
         10: 11})

In [97]:
report_obj.pred_cts

Counter({3: 169578,
         4: 11128,
         5: 10001,
         6: 8286,
         7: 4556,
         8: 37,
         9: 24,
         10: 11})

In [101]:
report_obj.hallucination_idx  # when gold is "O", but model thinks there is a NER tag

array([     5,     30,     33, ..., 203602, 203605, 203620])

In [104]:
report_obj.missed_ner_idx # when gold is a NER tag, but model think it is "O"

array([     0,      9,     10, ..., 203611, 203613, 203617])

In [102]:
report_obj.match_ner_idx # when both model and gold indicate a NER tag, and the tags matches

array([  1007,   1008,   1145, ..., 203577, 203580, 203603])

In [103]:
report_obj.mismatch_ner_idx # when both model and gold indicate a NER tag, and the tags mismatches

array([     2,      6,    129, ..., 203578, 203587, 203619])

In [105]:
report_obj.gold_pred_ct_dict # dictionary[gold_label][prediction_label] --> count

defaultdict(<function evaluation_helper.EvalDev_Report.get_gold_pred_idx_dict.<locals>.<lambda>>,
            {3: defaultdict(int,
                         {3: 141267,
                          4: 9232,
                          5: 8357,
                          6: 6896,
                          7: 3764,
                          8: 31,
                          9: 22,
                          10: 9}),
             4: defaultdict(int,
                         {3: 9218,
                          4: 631,
                          5: 544,
                          6: 466,
                          7: 265,
                          8: 1,
                          9: 1,
                          10: 2}),
             5: defaultdict(int,
                         {3: 8322,
                          4: 532,
                          5: 463,
                          6: 440,
                          7: 241,
                          8: 3,
                          9: 0,
                    

In [28]:
report_obj.gold_pred_idx_dict # dictionary[gold_label][prediction_label] --> data indices

defaultdict(<function evaluation_helper.EvalDev_Report.get_gold_pred_idx_dict.<locals>.<lambda>()>,
            {3: defaultdict(list,
                         {3: array([], dtype=int64),
                          4: array([], dtype=int64),
                          5: array([], dtype=int64),
                          6: array([     1,      3,      4, ..., 203615, 203616, 203618]),
                          7: array([    40,     44,     55, ..., 203525, 203535, 203575]),
                          8: array([     5,     30,     33, ..., 203592, 203594, 203605]),
                          9: array([   125,    148,    150, ..., 203482, 203533, 203602]),
                          10: array([   121,    131,    136, ..., 203566, 203584, 203620])}),
             4: defaultdict(list,
                         {3: array([], dtype=int64),
                          4: array([], dtype=int64),
                          5: array([], dtype=int64),
                          6: array([     9,     10,     

## Ignore this section

In [10]:
# !
# Get decoder Y -- 50 dim embedding of center word

train_decoderY = embedding_matrix[trainX[:,4]]
dev_decoderY = embedding_matrix[devX[:,4]]
test_decoderY = embedding_matrix[testX[:,4]]

In [11]:
# Get X pos tags

# encoding 1-hot for pos tags
trainX_pos_cat = to_categorical(trainX_pos.astype('float32'))
devX_pos_cat = to_categorical(devX_pos.astype('float32'), num_classes=trainX_pos_cat.shape[2]) 
testX_pos_cat = to_categorical(testX_pos.astype('float32'), num_classes=trainX_pos_cat.shape[2])

trainX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_pos_cat)), dtype=np.float)
devX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_pos_cat)), dtype=np.float)
testX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_pos_cat)), dtype=np.float)

In [12]:
# Get X capitlization 

# encoding 1-hot for capitalization info  ("allCaps", "upperInitial", "lowercase", "mixedCaps", "noinfo")
trainX_capitals_cat = to_categorical(trainX_capitals.astype('float32'))
devX_capitals_cat = to_categorical(devX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2]) 
testX_capitals_cat = to_categorical(testX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2])

trainX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_capitals_cat)), dtype=np.float)
devX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_capitals_cat)), dtype=np.float)
testX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_capitals_cat)), dtype=np.float)