In [24]:
import csv
import numpy as np
import pandas as pd
import json
import pickle

from main import train_setup
import sklearn.metrics as skmetric

In [25]:
with open('/raid/xiaoyuz1/goemotions/goemotions/data/emotions.txt', 'r') as f:
    emotion_list = f.readlines()
    emotion_list = [s.strip() for s in emotion_list]

emotion_text_to_label = dict(zip(emotion_list, range(len(emotion_list))))
label_to_emotion_text = dict(zip(range(len(emotion_list)), emotion_list))

ekman_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/ekman_mapping.json'))
go_to_ekman = {'neutral' : 'neutral'}
for k,v in ekman_to_go.items():
    for vi in v:
        go_to_ekman[vi] = k
ekman_texts = list(ekman_to_go.keys())
ekman_to_idx = dict(zip(ekman_texts, range(len(ekman_texts))))
ekman_to_idx["neutral"] = len(ekman_texts)


sentiment_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/sentiment_mapping.json'))
go_to_sentiment = {'neutral' : 'neutral'}
for k,v in sentiment_to_go.items():
    for vi in v:
        go_to_sentiment[vi] = k
sentiment_texts = list(sentiment_to_go.keys())
sentiment_to_idx = dict(zip(sentiment_texts, range(len(sentiment_texts))))
sentiment_to_idx["neutral"] = len(sentiment_texts)

In [26]:
label_to_emotion_text[25]

'sadness'

In [27]:
SEEDS = [
    79719,30010,46921,25577,52538,56440,41228,66558,48642,69556
]

In [28]:
meta_fname_template = "/raid/xiaoyuz1/goemotions/save_path/bert_seed-{}-meta.pkl"
pkl_fname_template = "/raid/xiaoyuz1/goemotions/pred_result/baseline/test_pred_seed-{}_label.pkl"

In [29]:
def individual_emotion_result(gt, pred, emotion_list, label2idx=None, text2idx=None):
    precs = []
    recalls = []
    f1s = []

    for emotion_label, emotion_text in enumerate(emotion_list):
        if label2idx is not None:
            emotion_idx = label2idx[emotion_label]
        else:
            assert text2idx is not None
            emotion_idx = text2idx[emotion_text]
            
        y_true = np.asarray(gt)
        y_pred = np.asarray(pred)

        y_true = y_true[:,emotion_idx]
        y_pred = y_pred[:,emotion_idx]

        prec = skmetric.precision_score(y_true, y_pred, average="binary")
        recall = skmetric.recall_score(y_true, y_pred, average="binary")
        f1 = skmetric.f1_score(y_true, y_pred, average="binary")

        precs += [prec]
        recalls += [recall]
        f1s += [f1]

    df_table_4 = pd.DataFrame({'Emotion': emotion_list,
                       'Precision': precs,
                       'Recall': recalls,
                      'F1' : f1s})
    
    return df_table_4

In [30]:
def all_individual_emotion_results(emotion_list, pkl_fname_template, meta_fname_template, seeds):
    df_tables = []
    for seed in seeds: 
        fh = open(pkl_fname_template.format(seed), 'rb')
        gt, pred, _ = pickle.load(fh)
        
        fh2 = open(meta_fname_template.format(seed), "rb")
        meta = pickle.load(fh2)
        
        label2idx = meta['labels2idxes'][0][0]
        label2idx = {int(key) : int(value) for key, value in label2idx.items()}
        idx2label = {value: key for key, value in label2idx.items()}
        
        df_table = individual_emotion_result(gt, pred, emotion_list, label2idx=label2idx)
        df_tables.append(df_table)
    
    emo_precs = []
    emo_recalls = []
    emo_f1s = []
    for i in range(len(emotion_list)):
        precs = []
        recalls = []
        f1s = []
        
        for df_table in df_tables:
            precs += [df_table.iloc[i]['Precision']]
            recalls += [df_table.iloc[i]['Recall']]
            f1s += [df_table.iloc[i]['F1']]
        
        prec_avg, prec_std = np.mean(precs), np.std(precs)
        recall_avg, recall_std = np.mean(recalls), np.std(recalls)
        f1_avg, f1_std = np.mean(f1s), np.std(f1s)

        emo_precs += [prec_avg]
        emo_recalls += [recall_avg]
        emo_f1s += [f1_avg]
        
#     df_emo = pd.DataFrame({'Emotion': emotion_list,
#                        'Precision': emo_precs,
#                        'Recall': emo_recalls,
#                       'F1' : emo_f1s})
    
    return {'Emotion': emotion_list,
                       'Precision': emo_precs,
                       'Recall': emo_recalls,
                      'F1' : emo_f1s}

In [31]:
emo_dict = all_individual_emotion_results(
    emotion_list, pkl_fname_template, meta_fname_template, SEEDS)

In [32]:
emo_ekman = []
emo_sentiment = []
for emo in emotion_list:
    emo_ekman += [go_to_ekman[emo]]
    emo_sentiment += [go_to_sentiment[emo]]

emo_dict['Ekman'] = emo_ekman
emo_dict['Sentiment'] = emo_sentiment

df_emo = pd.DataFrame(emo_dict)

In [33]:
df_emo.sort_values(by=['F1'], ascending=False)

Unnamed: 0,Emotion,Precision,Recall,F1,Ekman,Sentiment
15,gratitude,0.910913,0.904545,0.907614,joy,positive
1,amusement,0.764272,0.8625,0.810303,joy,positive
18,love,0.736025,0.82563,0.778093,joy,positive
0,admiration,0.650005,0.681548,0.664809,joy,positive
14,fear,0.629763,0.666667,0.647096,fear,negative
24,remorse,0.553345,0.741071,0.632717,sadness,negative
27,neutral,0.646696,0.61136,0.628417,neutral,neutral
17,joy,0.579339,0.601863,0.589668,joy,positive
25,sadness,0.568826,0.533974,0.550376,sadness,negative
20,optimism,0.577842,0.522043,0.548263,joy,positive


In [34]:
df_emo[df_emo['Sentiment'] == 'negative'].sort_values(by=["F1"], ascending=False)

Unnamed: 0,Emotion,Precision,Recall,F1,Ekman,Sentiment
14,fear,0.629763,0.666667,0.647096,fear,negative
24,remorse,0.553345,0.741071,0.632717,sadness,negative
25,sadness,0.568826,0.533974,0.550376,sadness,negative
2,anger,0.534629,0.449495,0.487735,anger,negative
11,disgust,0.507208,0.433333,0.466085,disgust,negative
12,embarrassment,0.498073,0.381081,0.428977,sadness,negative
10,disapproval,0.389205,0.346067,0.365863,anger,negative
19,nervousness,0.339894,0.365217,0.350755,fear,negative
3,annoyance,0.343905,0.320937,0.331786,anger,negative
16,grief,0.386667,0.25,0.296313,sadness,negative


In [44]:
df_emo[df_emo['Sentiment'] == 'negative'].sort_values(by=["F1"], ascending=False)

Unnamed: 0,Emotion,Precision,Recall,F1,Ekman,Sentiment
14,fear,0.629763,0.666667,0.647096,fear,negative
24,remorse,0.553345,0.741071,0.632717,sadness,negative
25,sadness,0.568826,0.533974,0.550376,sadness,negative
2,anger,0.534629,0.449495,0.487735,anger,negative
11,disgust,0.507208,0.433333,0.466085,disgust,negative
12,embarrassment,0.498073,0.381081,0.428977,sadness,negative
10,disapproval,0.389205,0.346067,0.365863,anger,negative
19,nervousness,0.339894,0.365217,0.350755,fear,negative
3,annoyance,0.343905,0.320937,0.331786,anger,negative
16,grief,0.386667,0.25,0.296313,sadness,negative


In [45]:
df_emo.mean()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



Precision    0.519789
Recall       0.489546
F1           0.498134
dtype: float64

In [43]:
df_emo[df_emo['Sentiment'] == 'negative'].mean()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



Precision    0.458828
Recall       0.429479
F1           0.438089
dtype: float64

In [37]:
myseed = 48642
meta_fname = meta_fname_template.format(myseed)
pkl_fname = pkl_fname_template.format(myseed)

with open(meta_fname, "rb") as f:
    meta = pickle.load(f)

label2idx = meta['labels2idxes'][0][0]
label2idx = {int(key) : int(value) for key, value in label2idx.items()}
idx2label = {value: key for key, value in label2idx.items()}

df_test = pd.read_csv("/raid/xiaoyuz1/goemotions/goemotions/data/test.csv")

fh = open(pkl_fname, 'rb')
gt, pred, _ = pickle.load(fh)
gt = np.asarray(gt)
y_pred = np.asarray(pred)

In [51]:
def probe(gt, pred, row_i, study_emo):
#     print(df_test.iloc[row_i].text)
    gt_idxs = np.where(gt[row_i])[0]
    pred_idxs = np.where(pred[row_i])[0]
    gts = []
    
    for i in gt_idxs:
        l1 = label_to_emotion_text[idx2label[i]]
        gts += [l1]
#     print("gt: ", ' '.join(gts))
    preds = []
    for i in pred_idxs:
        preds += [label_to_emotion_text[idx2label[i]]]
#     print("pred: ", ' '.join(preds))
    
    preds_filtered = []
    if(len(preds) == 0):
        return ["null"]
    for i in preds:
        if not(i == study_emo):
             preds_filtered.append(i)
        
    return preds_filtered

for study_emo in list(df_emo[df_emo['Sentiment'] == 'negative']['Emotion']):
    study_idx = label2idx[emotion_text_to_label[study_emo]]
    mask = gt[:, study_idx] > 0
    got_wrong_mask = (gt[:, study_idx] != y_pred[:, study_idx]) * mask
    wrong_row_idxs = np.where(got_wrong_mask)[0]  
    preds = []
    for row_idx in wrong_row_idxs:
        preds += probe(gt, y_pred, row_idx, study_emo)
        
    wrong_labels, wrong_counts = np.unique(preds, return_counts=True)
    wrong_labels2 = []
    wrong_counts2 = []
    for k in np.argsort(wrong_counts)[::-1]:
        wrong_labels2 += [wrong_labels[k]]
        wrong_counts2 += [wrong_counts[k]]
    #print("{}: ".format(study_emo), list(zip(wrong_labels2[:5], np.asarray(wrong_counts2[:5]) / np.sum(mask) )))
    #print("\n")#  
    print("{} & {} ({:.2f})".format(study_emo, wrong_labels2[0], wrong_counts2[0] / np.sum(mask) ))
    

anger & neutral (0.16)
annoyance & neutral (0.29)
disappointment & neutral (0.24)
disapproval & neutral (0.29)
disgust & annoyance (0.15)
embarrassment & neutral (0.14)
fear & neutral (0.09)
grief & neutral (0.33)
nervousness & fear (0.17)
remorse & sadness (0.11)
sadness & neutral (0.12)


In [14]:
# import copy

# neutral_idx = label2idx[emotion_text_to_label['neutral']]

# pred_ideal = copy.deepcopy(pred)
# for row_idx,(gt_row, pred_row) in enumerate(zip(gt, pred)):
#     gt_idxs = list(np.where(gt_row)[0])
    
#     if(neutral_idx in gt_idxs):
#         continue
    
#     if(pred_row[neutral_idx] == 1):
#         for j in gt_idxs:
#             pred_ideal[row_idx, j] = 1
#         pred_ideal[row_idx, neutral_idx] = 0

In [15]:
# df_ideal = individual_emotion_result(gt, pred_ideal, emotion_list, label2idx=label2idx)

In [16]:
# df_ideal.sort_values(by=['F1'], ascending=False)

In [17]:
# df_ideal[df_ideal.Emotion.isin([
#     'fear',
#   'nervousness',
#   'remorse',
#   'embarrassment',
#   'disappointment',
#   'sadness',
#   'grief',
#   'disgust',
#   'anger',
#   'annoyance',
#   'disapproval',
#     'neutral',
# ])].sort_values(by=["F1"], ascending=False)

In [18]:
def probe(gt, pred, row_i, study_emo):
    print(df_test.iloc[row_i].text)
    gt_idxs = np.where(gt[row_i])[0]
    pred_idxs = np.where(pred[row_i])[0]
    gts = []
    
    for i in gt_idxs:
        l1 = label_to_emotion_text[idx2label[i]]
        gts += [l1]
    print("gt: ", ' '.join(gts))
    preds = []
    for i in pred_idxs:
        preds += [label_to_emotion_text[idx2label[i]]]
    print("pred: ", ' '.join(preds))
    
    preds_filtered = []
    if(len(preds) == 0):
        return ["null"]
    for i in preds:
        if not(i == study_emo):
             preds_filtered.append(i)
        
    return preds_filtered

In [19]:
study_emo = "sadness"
print("label: ", emotion_text_to_label[study_emo])
study_idx = label2idx[emotion_text_to_label[study_emo]]
print("idx: ", study_idx)


mask = gt[:, study_idx] > 0
print("total: ", np.sum(mask))


got_wrong_mask = (gt[:, study_idx] != pred[:, study_idx]) * mask
print("got wrong total: ", np.sum(got_wrong_mask))
print("\n")
wrong_row_idxs = np.where(got_wrong_mask)[0]  

preds = []
for row_idx in wrong_row_idxs:
    print(row_idx)
    preds += probe(gt, pred, row_idx, study_emo)
    print("\n")

label:  25
idx:  17
total:  156
got wrong total:  72


0
I’m really sorry about your situation :( Although I love the names Sapphira, Cirilla, and Scarlett!
gt:  sadness
pred:  love remorse


152
Must be why I'm alive. That sucks.
gt:  annoyance sadness
pred:  annoyance


182
And [NAME], would again like to apologize for any misunderstanding.
gt:  sadness remorse
pred:  neutral


249
I did twist my ankle trying to play that on DDR.
gt:  sadness
pred:  neutral


353
Losing my identity, wondering have I gone insane...
gt:  surprise sadness
pred:  surprise


369
The UN said the election was a sham. The man is causing death destruction and starvation. Your morals are completely and utterly fucked.
gt:  disappointment sadness
pred:  anger


378
Oh god another injury
gt:  sadness
pred:  


550
Wow, you folks sure are salty about losing the midterms! Pathetic?
gt:  sadness
pred:  surprise


563
I'll pass the "I'm not crying you're crying!" part. Yeah I'm crying. 
gt:  sadness
pred:  approval


In [20]:
wrong_labels, wrong_counts = np.unique(preds, return_counts=True)
wrong_labels2 = []
wrong_counts2 = []
for k in np.argsort(wrong_counts)[::-1]:
    wrong_labels2 += [wrong_labels[k]]
    wrong_counts2 += [wrong_counts[k]]

In [21]:
ss = []
for l,c in zip(wrong_labels2, wrong_counts2):
    print(l, c)
    ss += ["{} ".format(l)]
   #  ss += ["{} ({}) ".format(l,c)]


wrong_freq_row = "\textbf{}  & "
wrong_freq_row += ", ".join(ss[:5])
wrong_freq_row + " \\ "

neutral 19
remorse 12
disappointment 7
annoyance 6
approval 5
surprise 4
anger 4
admiration 4
null 4
disapproval 3
confusion 3
gratitude 2
grief 2
love 2
desire 2
fear 1
curiosity 1
caring 1
nervousness 1
optimism 1
excitement 1


'\textbf{}  & neutral , remorse , disappointment , annoyance , approval  \\ '

In [22]:
# y_true_study_idx = np.ones(np.sum(got_wrong_mask)) * study_idx
# y_pred_study_idx = []
# for row in pred[got_wrong_mask]:
#     row_wrong_idxs = list(np.where(row)[0])
#     row_wrongs = []
#     for j in row_wrong_idxs:
#         row_wrongs += [idx2label[j] ]
    
        
#     y_pred_study_idx += []

# wrong_labels, wrong_counts = np.unique(y_pred_study_idx, return_counts=True)
# for k in np.argsort(wrong_counts)[::-1]:
# #     print(label)
#     print(label_to_emotion_text[wrong_labels[k]], wrong_counts[k])

In [23]:
# df_test.iloc[1294]