In [2]:
import pandas as pd

df = pd.read_csv("confidence_all_models.csv")
df

Unnamed: 0,sample id,Sentence 1,Sentence 2,Gold label,Machine_RANDOM_Label,Machine_RANDOM_Confidence,Machine_TF-IDF_Label,Machine_TF-IDF_Confidence,Machine_LSTM_Label,Machine_LSTM_Confidence,Machine_ROBERTA_Label,Machine_ROBERTA_Confidence,Machine_DAVINCI_Label,Machine_DAVINCI_Confidence,Human_Label,Human_Confidence
0,0,A little boy is opening gifts surrounded by a ...,The boy is being punished,contradiction,neutral,0.33,neutral,0.48,neutral,0.52,contradiction,0.89,contradiction,0.93,contradiction,0.84
1,1,"People playing cricket in the park, pine trees...","People are playing sports in the park, near th...",entailment,contradiction,0.33,neutral,0.60,entailment,0.54,entailment,0.91,entailment,0.75,entailment,0.89
2,2,Some people hanging out on a large backyard deck.,people hanging out on deck,entailment,entailment,0.33,entailment,0.50,entailment,0.54,entailment,0.97,entailment,1.00,entailment,0.95
3,3,A group of dancers are performing.,The audience is silent.,neutral,entailment,0.33,contradiction,0.89,contradiction,0.57,neutral,0.98,neutral,1.00,neutral,0.86
4,4,A large brown and white dog is carrying a stic...,A puppy is playing fetch with a stick.,neutral,contradiction,0.33,neutral,0.79,neutral,0.57,neutral,1.00,neutral,1.00,neutral,0.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,85,"A group of asian women in sports attire, and o...",Men are playing with a dog,contradiction,entailment,0.33,entailment,0.52,contradiction,0.57,contradiction,1.00,contradiction,0.98,contradiction,0.64
86,86,A snowboarder is jumping over a snow slope.,A girl jumps her green snowboard over a slope.,neutral,neutral,0.33,contradiction,0.53,neutral,0.55,neutral,1.00,neutral,1.00,neutral,0.69
87,87,a group of people on a dock lowering into the ...,The group was getting ready to go fishing on t...,neutral,neutral,0.33,neutral,0.66,neutral,0.56,neutral,1.00,neutral,1.00,neutral,0.83
88,88,A young girl in a bathing suit drinking a beve...,The girl is drinking milk from a sippy cup.,contradiction,entailment,0.33,contradiction,0.59,neutral,0.47,contradiction,0.99,neutral,0.99,neutral,0.55


In [11]:
models = ['RANDOM', 'TF-IDF', 'LSTM', 'ROBERTA', 'DAVINCI']
models.sort()

models

['DAVINCI', 'LSTM', 'RANDOM', 'ROBERTA', 'TF-IDF']

In [43]:
dist_labels = ['KL', "JSD", 'MSE']
subsets = ['ALL', 'AGREE', 'DISAGREE']

In [48]:
from scipy.spatial import distance
from scipy.special import rel_entr
from sklearn.metrics import mean_squared_error

def calc_dist(p, q):
    kl = sum(rel_entr(p, q))
    jsd = distance.jensenshannon(p, q)
    sqe = mean_squared_error(p, q)
    
    return {
        "KL": kl, 
        "JSD": jsd, 
        "MSE": sqe
    }

In [53]:
results = {
    "ALL": dict(),
    "AGREE": dict(),
    "DISAGREE": dict()
}

model_1 = []
model_2 = []


for i in range(len(models)):
    for j in range(len(models)):
        if i != j:
            print(models[i], models[j])
            
            model_1.append(models[i])
            model_2.append(models[j])
            
            model1_col_label = f"Machine_{models[i]}_Label"
            model2_col_label = f"Machine_{models[j]}_Label"

            model1_col_conf = f"Machine_{models[i]}_Confidence"
            model2_col_conf = f"Machine_{models[j]}_Confidence"

            dist = calc_dist(df[model1_col_conf].tolist(), df[model2_col_conf].tolist())
            results['ALL'][f"{models[i]}/{models[j]}"] = dist
            
            tmp_df = df[df[model1_col_label] == df[model2_col_label]]            
            dist = calc_dist(tmp_df[model1_col].tolist(), tmp_df[model2_col].tolist())
            results['AGREE'][f"{models[i]}/{models[j]}"] = dist
            
            tmp_df = df[df[model1_col_label] != df[model2_col_label]]            
            dist = calc_dist(tmp_df[model1_col].tolist(), tmp_df[model2_col].tolist())
            results['DISAGREE'][f"{models[i]}/{models[j]}"] = dist

DAVINCI LSTM
DAVINCI RANDOM
DAVINCI ROBERTA
DAVINCI TF-IDF
LSTM DAVINCI
LSTM RANDOM
LSTM ROBERTA
LSTM TF-IDF
RANDOM DAVINCI
RANDOM LSTM
RANDOM ROBERTA
RANDOM TF-IDF
ROBERTA DAVINCI
ROBERTA LSTM
ROBERTA RANDOM
ROBERTA TF-IDF
TF-IDF DAVINCI
TF-IDF LSTM
TF-IDF RANDOM
TF-IDF ROBERTA


In [57]:
result_df = pd.DataFrame()

result_df['model_1'] = model_1
result_df['model_2'] = model_2

result_df    

Unnamed: 0,model_1,model_2
0,DAVINCI,LSTM
1,DAVINCI,RANDOM
2,DAVINCI,ROBERTA
3,DAVINCI,TF-IDF
4,LSTM,DAVINCI
5,LSTM,RANDOM
6,LSTM,ROBERTA
7,LSTM,TF-IDF
8,RANDOM,DAVINCI
9,RANDOM,LSTM


In [58]:
for subset in subsets:
    for metric in dist_labels:
        col_data = []
        for pair in results[subset].keys():
            col_data.append(results[subset][pair][metric])

        result_df[f"{subset}_{metric}"] = col_data

result_df

Unnamed: 0,model_1,model_2,ALL_KL,ALL_JSD,ALL_MSE,AGREE_KL,AGREE_JSD,AGREE_MSE,DISAGREE_KL,DISAGREE_JSD,DISAGREE_MSE
0,DAVINCI,LSTM,52.789413,0.0695,0.201329,-10.907184,0.092497,0.123388,-9.212083,0.105192,0.136373
1,DAVINCI,RANDOM,88.927204,0.048762,0.383641,-7.394468,0.089379,0.126422,-12.7248,0.103163,0.130893
2,DAVINCI,ROBERTA,3.416663,0.07005,0.029712,-12.534024,0.082745,0.119698,-7.585243,0.118814,0.143711
3,DAVINCI,TF-IDF,39.738924,0.10663,0.148882,-9.984601,0.088105,0.131286,-10.134666,0.106648,0.127569
4,LSTM,DAVINCI,-26.994548,0.0695,0.201329,-10.907184,0.092497,0.123388,-9.212083,0.105192,0.136373
5,LSTM,RANDOM,20.5536,0.046243,0.037139,-7.262403,0.103703,0.13685,-12.856864,0.095559,0.12514
6,LSTM,ROBERTA,-26.593697,0.043572,0.174332,-17.250085,0.095396,0.139607,-2.869182,0.105812,0.088089
7,LSTM,TF-IDF,-6.713881,0.093565,0.036676,-11.713376,0.09657,0.124642,-8.405891,0.100812,0.135981
8,RANDOM,DAVINCI,-30.719782,0.048762,0.383641,-7.394468,0.089379,0.126422,-12.7248,0.103163,0.130893
9,RANDOM,LSTM,-12.752766,0.046243,0.037139,-7.262403,0.103703,0.13685,-12.856864,0.095559,0.12514


In [60]:
result_df.to_csv("SNLI_model_dist.csv", header=True, index=False, sep=',')