In [1]:
import csv
import numpy as np
import pandas as pd
import json
import pickle
import torch

from main import train_setup
import sklearn.metrics as skmetric

In [2]:
with open('/raid/xiaoyuz1/goemotions/goemotions/data/emotions.txt', 'r') as f:
    emotion_list = f.readlines()
    emotion_list = [s.strip() for s in emotion_list]

emotion_text_to_label = dict(zip(emotion_list, range(len(emotion_list))))
label_to_emotion_text = dict(zip(range(len(emotion_list)), emotion_list))

ekman_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/ekman_mapping.json'))
go_to_ekman = {'neutral' : 'neutral'}
for k,v in ekman_to_go.items():
    for vi in v:
        go_to_ekman[vi] = k
ekman_texts = list(ekman_to_go.keys())
ekman_to_idx = dict(zip(ekman_texts, range(len(ekman_texts))))
ekman_to_idx["neutral"] = len(ekman_texts)


sentiment_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/sentiment_mapping.json'))
go_to_sentiment = {'neutral' : 'neutral'}
for k,v in sentiment_to_go.items():
    for vi in v:
        go_to_sentiment[vi] = k
sentiment_texts = list(sentiment_to_go.keys())
sentiment_to_idx = dict(zip(sentiment_texts, range(len(sentiment_texts))))
sentiment_to_idx["neutral"] = len(sentiment_texts)

In [3]:
df_test = pd.read_csv("/raid/xiaoyuz1/goemotions/goemotions/data/test.csv")
gt = np.zeros((len(df_test), 28)).astype(int)
for i in range(len(df_test)):
    row = df_test.iloc[i]
    ls = [int(l) for l in row['label'].split(",")]
    for l in ls:
        gt[i, l] = 1

In [4]:
def get_y_pred(fname):
    fh = open(fname, "r")
    L = fh.readlines()
    logits = []
    for line in L:
        logit = [float(l) for l in line.strip().split(" ")]
        logits.append(logit)
    
    y_pred = (torch.tensor(np.asarray(logits).reshape(-1,28)).sigmoid() > 0.5) * 1
    return y_pred

In [5]:
def individual_emotion_result(gt, pred, emotion_list, label2idx=None, text2idx=None):
    precs = []
    recalls = []
    f1s = []

    for emotion_label, emotion_text in enumerate(emotion_list):
        if label2idx is not None:
            emotion_idx = label2idx[emotion_label]
        else:
            assert text2idx is not None
            emotion_idx = text2idx[emotion_text]
            
        y_true = np.asarray(gt)
        y_pred = np.asarray(pred)

        y_true = y_true[:,emotion_idx]
        y_pred = y_pred[:,emotion_idx]

        prec = skmetric.precision_score(y_true, y_pred, average="binary")
        recall = skmetric.recall_score(y_true, y_pred, average="binary")
        f1 = skmetric.f1_score(y_true, y_pred, average="binary")

        precs += [prec]
        recalls += [recall]
        f1s += [f1]

    df_table_4 = pd.DataFrame({'Emotion': emotion_list,
                       'Precision': precs,
                       'Recall': recalls,
                      'F1' : f1s})
    
    return df_table_4

In [6]:
label2idx = dict(zip(range(28), range(28)))
idx2label = dict(zip(range(28), range(28)))

In [7]:
def get_df_emo(fname):
    df_table = individual_emotion_result(gt, get_y_pred(fname), emotion_list, label2idx=label2idx)
    emo_ekman = []
    emo_sentiment = []
    for emo in emotion_list:
        emo_ekman += [go_to_ekman[emo]]
        emo_sentiment += [go_to_sentiment[emo]]

    emo_dict = {}
    for c in df_table:
        emo_dict[c] = list(df_table[c].to_numpy())
    emo_dict['Ekman'] = emo_ekman
    emo_dict['Sentiment'] = emo_sentiment

    df_emo = pd.DataFrame(emo_dict)
    return df_emo

In [8]:
fname = "/raid/xiaoyuz1/goemotions_result/both-merge-epoch-10/p1-i0/eval_logits.txt"

fname_LARGE = "/raid/xiaoyuz1/goemotions_result/6-separate/p1-i0/eval_logits.txt"
fname_AFS = \
    "/raid/xiaoyuz1/goemotions_result/goemotion-prompt-ekman_neg-small-batch-epoch-5/p1-i0/eval_logits.txt"

# df_table_LARGE = individual_emotion_result(gt, get_y_pred(fname_LARGE), emotion_list, label2idx=label2idx)
# df_table_AFS = individual_emotion_result(gt, get_y_pred(fname_AFS), emotion_list, label2idx=label2idx)

df_emo_LARGE = get_df_emo(fname_LARGE)
df_emo_AFS = get_df_emo(fname_AFS)


Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.



In [9]:
fname_merge_AFS = "/raid/xiaoyuz1/goemotions_result/both-merge-epoch-10/p1-i0/eval_logits.txt"
fname_merge_LARGE = "/raid/xiaoyuz1/goemotions_result/both_merge_6-epoch-6/p1-i0/eval_logits.txt"

df_emo_merge_LARGE = get_df_emo(fname_merge_LARGE)
df_emo_merge_AFS = get_df_emo(fname_merge_AFS)


Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.



In [10]:
print(df_emo_merge_LARGE.mean())
print(df_emo_merge_AFS.mean())

Precision    0.502704
Recall       0.443017
F1           0.457436
dtype: float64
Precision    0.547643
Recall       0.454240
F1           0.476004
dtype: float64



Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



In [11]:
# df_table.sort_values(by=['F1'], ascending=False)

In [12]:
def show_neg(df_emo):
    print(df_emo[df_emo['Sentiment'] == 'negative'].mean())
    return df_emo[df_emo['Sentiment'] == 'negative'].sort_values(by=["F1"], ascending=False)

def show_pos(df_emo):
    print(df_emo[df_emo['Sentiment'] == 'positive'].mean())
    return df_emo[df_emo['Sentiment'] == 'positive'].sort_values(by=["F1"], ascending=False)


# df_emo.sort_values(by=['F1'], ascending=False)
# 
# 

In [None]:
show_neg(df_emo_LARGE)

In [None]:
show_neg(df_emo_AFS)

In [None]:
show_neg(df_emo_merge_AFS)

In [None]:
show_neg(df_emo_merge_LARGE)

In [None]:
def probe(gt, pred, row_i, study_emo):
    print(df_test.iloc[row_i].text)
    gt_idxs = np.where(gt[row_i])[0]
    pred_idxs = np.where(pred[row_i])[0]
    gts = []
    
    for i in gt_idxs:
        l1 = label_to_emotion_text[idx2label[i]]
        gts += [l1]
    print("gt: ", ' '.join(gts))
    preds = []
    for i in pred_idxs:
        preds += [label_to_emotion_text[idx2label[i]]]
    print("pred: ", ' '.join(preds))
    
    preds_filtered = []
    if(len(preds) == 0):
        return ["null"]
    for i in preds:
        if not(i == study_emo):
             preds_filtered.append(i)
        
    return preds_filtered

In [14]:
def probe(gt, pred, row_i, study_emo):
#     print(df_test.iloc[row_i].text)
    gt_idxs = np.where(gt[row_i])[0]
    pred_idxs = np.where(pred[row_i])[0]
    gts = []
    
    for i in gt_idxs:
        l1 = label_to_emotion_text[idx2label[i]]
        gts += [l1]
#     print("gt: ", ' '.join(gts))
    preds = []
    for i in pred_idxs:
        preds += [label_to_emotion_text[idx2label[i]]]
    
    preds_filtered = []
    all_preds = []
    if(len(preds) == 0):
        return ["null"],["null"]
    for i in preds:
        all_preds.append(i)
        if not(i == study_emo):
             preds_filtered.append(i)
        
    return preds_filtered,preds

def print_individual(fname, df_emo):
    y_pred = get_y_pred(fname).numpy()
    #for study_emo in list(df_emo[df_emo['Sentiment'] == 'negative']['Emotion']):
    for study_emo in ["fear", "remorse", "sadness", "anger", "disgust", "embarrassment", "disapproval", "nervousness", "annoyance", "grief", "disappointment"]:
        study_idx = label2idx[emotion_text_to_label[study_emo]]
        mask = gt[:, study_idx] > 0
        got_wrong_mask = (gt[:, study_idx] != y_pred[:, study_idx]) * mask
        wrong_row_idxs = np.where(got_wrong_mask)[0]  
        preds = []
        all_preds = []
        for row_idx in wrong_row_idxs:
            preds_i,all_preds_i = probe(gt, y_pred, row_idx, study_emo)
            preds += preds_i
            all_preds += all_preds_i

        wrong_labels, wrong_counts = np.unique(preds, return_counts=True)

        wrong_labels2 = []
        wrong_counts2 = []
        for k in np.argsort(wrong_counts)[::-1]:
            wrong_labels2 += [wrong_labels[k]]
            wrong_counts2 += [wrong_counts[k]]
        #print(np.sum(wrong_counts2), np.sum(mask))
        #print("{}: ".format(study_emo), list(zip(wrong_labels2[:5], np.asarray(wrong_counts2[:5]) / np.sum(mask) ))) 
        #print("\n")# 
        print("{} & {} ({:.2f})".format(study_emo, wrong_labels2[0], wrong_counts2[0] / np.sum(mask) ))

In [18]:
print_individual(fname_merge_LARGE, df_emo_merge_LARGE)

fear & null (0.10)
remorse & sadness (0.05)
sadness & null (0.10)
anger & annoyance (0.18)
disgust & annoyance (0.13)
embarrassment & null (0.16)
disapproval & neutral (0.26)
nervousness & fear (0.30)
annoyance & neutral (0.21)
grief & sadness (0.33)
disappointment & neutral (0.15)


In [17]:
print_individual(fname_LARGE, df_emo_LARGE)

fear & neutral (0.15)
remorse & sadness (0.14)
sadness & neutral (0.09)
anger & annoyance (0.21)
disgust & annoyance (0.18)
embarrassment & neutral (0.19)
disapproval & neutral (0.28)
nervousness & fear (0.17)
annoyance & neutral (0.25)
grief & anger (0.33)
disappointment & neutral (0.23)


In [15]:
print_individual(fname_AFS, df_emo_AFS)

fear & null (0.10)
remorse & null (0.09)
sadness & null (0.14)
anger & null (0.16)
disgust & null (0.23)
embarrassment & null (0.24)
disapproval & null (0.30)
nervousness & null (0.35)
annoyance & neutral (0.22)
grief & neutral (0.33)
disappointment & null (0.28)


In [16]:
print_individual(fname_merge_AFS, df_emo_merge_AFS)

fear & neutral (0.13)
remorse & sadness (0.12)
sadness & remorse (0.08)
anger & annoyance (0.22)
disgust & annoyance (0.17)
embarrassment & null (0.08)
disapproval & neutral (0.25)
nervousness & fear (0.35)
annoyance & neutral (0.21)
grief & surprise (0.17)
disappointment & neutral (0.17)


In [None]:
study_emo = "anger"
print("label: ", emotion_text_to_label[study_emo])
study_idx = label2idx[emotion_text_to_label[study_emo]]
print("idx: ", study_idx)


mask = gt[:, study_idx] > 0
print("total: ", np.sum(mask))


got_wrong_mask = (gt[:, study_idx] != y_pred[:, study_idx]) * mask
print("got wrong total: ", np.sum(got_wrong_mask))
print("\n")
wrong_row_idxs = np.where(got_wrong_mask)[0]  

preds = []
for row_idx in wrong_row_idxs:
    print(row_idx)
    preds += probe(gt, y_pred, row_idx, study_emo)
    print("\n")

wrong_labels, wrong_counts = np.unique(preds, return_counts=True)
wrong_labels2 = []
wrong_counts2 = []
for k in np.argsort(wrong_counts)[::-1]:
    wrong_labels2 += [wrong_labels[k]]
    wrong_counts2 += [wrong_counts[k]]

ss = []
for l,c in zip(wrong_labels2, wrong_counts2):
    print(l, c, c/np.sum(wrong_counts2))
    ss += ["{} ".format(l)]


wrong_freq_row = "\textbf{}  & "
wrong_freq_row += ", ".join(ss[:5])
wrong_freq_row + " \\ "

In [None]:
y_true_study_idx = np.ones(np.sum(got_wrong_mask)) * study_idx
y_pred_study_idx = []
for row in y_pred[got_wrong_mask]:
    row_wrong_idxs = list(np.where(row)[0])
    row_wrongs = []
    for j in row_wrong_idxs:
        row_wrongs += [idx2label[j] ]
    
        
    y_pred_study_idx += []

wrong_labels, wrong_counts = np.unique(y_pred_study_idx, return_counts=True)
for k in np.argsort(wrong_counts)[::-1]:
#     print(label)
    print(label_to_emotion_text[wrong_labels[k]], wrong_counts[k])

In [None]:
df_test.iloc[1294]