In [129]:
import json
import data
from eval_model import compute_accuracy
import pandas as pd

tag_vocab, _ = data.load_vocab('data/swda_tag_vocab.json')
test_data = data.load_data('data/swda_test.json', 'utts', 'tags')

with open('models/wordvec-avg.L/preds.E10.json') as f:
    preds_wva = json.load(f)
preds_wva = [[tag_vocab[t] for t in d] for d in preds_wva]

with open('models/bert.L/preds.E5.json') as f:
    preds_bert = json.load(f)
preds_bert = [[tag_vocab[t] for t in d] for d in preds_bert]

In [130]:
df = []
for i, diag in enumerate(test_data):
    for j, (utt, tag) in enumerate(zip(*diag)):
        df.append(
            { 'diag_id': i
            , 'utt_id': j
            , 'utt': utt
            , 'tag': tag
            , 'pred_wva': preds_wva[i][j]
            , 'pred_bert': preds_bert[i][j]}
        )
df = pandas.DataFrame(df)
df['correct_wva'] = df.pred_wva == df.tag
df['correct_bert'] = df.pred_bert == df.tag

In [131]:
print("Wordvec-Avg accuracy {:.4f}".format(sum(df.correct_wva) / len(df)))
print("Bert accuracy        {:.4f}".format(sum(df.correct_bert) / len(df)))

Wordvec-Avg accuracy 0.7017
Bert accuracy        0.7788


In [140]:
def scores(df, pred):   
    tp = df[df[pred] == df.tag].tag.value_counts()
    p  = df[pred].value_counts()
    precision = (tp / p).fillna(0)
    recall = (tp / df.tag.value_counts()).fillna(0)
    f1 = (2 * (precision_wva * recall_wva) / (precision_wva + recall_wva)).fillna(0)
    return precision, recall, f1

In [94]:
pd.crosstab(df.tag, df.pred_wva)

pred_wva,%,+,aa,b,bh,bk,fc,fe/ba,"fo/o/fw/""/by/bc",fx/sv,na,nn,ny,qo,qr/qy,qw,sd,x
tag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
%,2191,165,45,247,0,0,6,5,0,86,0,1,10,0,1,1,346,8
+,82,2817,25,33,0,0,2,8,0,196,1,0,0,0,26,8,374,3
^2,15,22,10,14,1,0,1,1,0,26,0,0,0,0,2,0,39,5
^g,0,0,1,4,3,0,0,0,0,0,0,0,0,0,8,0,0,0
^h,16,2,3,1,0,0,2,4,0,7,1,3,3,0,0,0,28,2
^q,5,7,3,5,0,0,0,5,0,39,0,0,0,0,8,0,132,2
aa,49,56,677,1228,0,1,2,51,0,142,0,5,27,0,0,0,138,11
aap/am,0,1,4,0,0,0,1,0,0,5,0,0,0,0,0,0,2,0
ad,5,19,3,3,0,0,4,4,1,22,0,0,1,0,14,1,111,1
ar,4,1,44,0,0,0,0,2,0,2,0,2,0,0,0,0,9,4


In [95]:
pd.crosstab(df.tag, df.pred_bert)

pred_bert,%,+,^2,aa,b,bh,fe/ba,fx/sv,h,ny,qr/qy,qw,qy^d,sd,x
tag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
%,1627,150,0,1,292,0,3,79,1,0,3,0,0,939,17
+,91,2386,0,11,109,0,10,246,1,0,26,1,0,684,10
^2,11,16,0,2,35,0,3,12,0,0,1,0,0,50,6
^g,0,0,0,0,5,0,0,0,0,0,10,0,0,1,0
^h,7,3,0,0,4,0,2,11,0,0,0,0,0,44,1
^q,4,9,0,1,10,0,2,18,0,0,4,1,0,156,1
aa,41,65,1,123,1494,0,65,100,0,1,5,0,0,482,10
aap/am,1,2,0,1,2,0,0,0,1,0,0,0,0,6,0
ad,6,13,0,2,18,0,2,19,0,0,5,0,0,123,1
ar,3,1,0,10,5,0,2,1,0,0,3,0,0,43,0
