# Cross-etropy and Kullback-Leibler

In [26]:
import sys
try:
    sys.path.insert(0, "/usr/lib/python3.7/site-packages")
except FileNotFoundError:
    pass
import torch as t
import pandas as pd
import math
from allennlp.predictors.predictor import Predictor

In [27]:
def softmax(tens: t.Tensor):
    tmp = []
    denom = float(t.sum(t.exp(tens)))
    for i in range(tens.size()[0]):
        tmp += [math.exp(tens[i])/denom]
    return t.Tensor(tmp)

In [28]:
print(softmax(t.Tensor([1, 2, 3, 2])))

tensor([0.0723, 0.1966, 0.5344, 0.1966])


## Defining metrics

In [40]:
def cross_entropy_softmax(gold_prob: t.Tensor, mod_prob: t.Tensor):
    gold_prob = softmax(gold_prob.float())
    mod_prob  = softmax(mod_prob.float())
    # sum(p*log(q))
    return float(-t.sum(t.mul( gold_prob, t.log2(mod_prob) )))

def KL_divergention_softmax(gold_prob: t.Tensor, mod_prob: t.Tensor):
    gold_prob = softmax(gold_prob.float())
    mod_prob  = softmax(mod_prob.float())
    # sum(p*log(q/p))
    return float(-t.sum(t.mul( gold_prob, t.log2(t.div(mod_prob, gold_prob)) )))

In [32]:
x1 = t.tensor([10, 1, 9])
x2 = t.tensor([88.7, 2, 5])
KL_divergention_softmax(x1, x2)

31.642784118652344

In [33]:
def str2list(x):
    return [y.strip() for y in x[2:-2].replace('"','').replace("'",'').split(',')]
def list2set(x):
    return set(x)

def uniq_events (test_df):
    test_df["Xintent"] = test_df["Xintent"].apply(str2list)
    test_df["Xemotion"] = test_df["Xemotion"].apply(str2list)
    test_df["Otheremotion"] = test_df["Otheremotion"].apply(str2list)
    test_df = test_df.groupby(["Event"])["Xintent", "Xemotion", "Otheremotion"].sum()
    
    test_df["Xintent"] = test_df["Xintent"].apply(list2set)
    test_df["Xemotion"] = test_df["Xemotion"].apply(list2set)
    test_df["Otheremotion"] = test_df["Otheremotion"].apply(list2set)
    return test_df

## returns KL and soft_max

In [41]:
def metrics_with_softmax(predictor, path_to_csv = 'event2mind/test.csv'):
    gold_ds = pd.read_csv(path_to_csv)[['Event','Xintent','Xemotion','Otheremotion']]
    gold_ds = uniq_events(gold_ds)
    connected = {"Xintent": "xintent", "Xemotion": "xreact", "Otheremotion": "oreact"}
    average = 0
    L = gold_ds.shape[0]
    counter = 0; n = 0
    
    gold_res = []
    mod_res = []
    for index, row in gold_ds.iterrows():
        if(n == (counter*100)//L):
            print("\rCalculating: " + str(n) + "%", end = "")
            n += 1
        for column_name in connected.keys():
            result = predictor.predict(source=index)
            model_out = [ (" ".join(react), prob) for react, prob in zip(result[connected[column_name] + "_top_k_predicted_tokens"], result[connected[column_name] + "_top_k_log_probabilities"])]
            gold_res += [len(row[column_name])]
            mod_res  += [len(set(model_out[:10]) & row[column_name])]    
        counter  += 1
    print("\r", end = "")
    return [KL_divergention_softmax(t.Tensor(gold_res), t.Tensor(mod_res)), cross_entropy_softmax(t.Tensor(gold_res), t.Tensor(mod_res))]

In [43]:
predictor = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/event2mind-2018.10.26.tar.gz")
KL_div, cross_entr = metrics_with_softmax(predictor)
print("Cross-Entropy:              ", str(float('{:.4f}'.format(cross_entr))))
print("Kullback-Leibler divergence:", str(float('{:.4f}'.format(KL_div))))

Cross-Entropy:               12.7264
Kullback-Leibler divergence: 1.8818
