In [8]:
import csv
import numpy as np
import pandas as pd
import json
import pickle

from main import train_setup
import sklearn.metrics as skmetric


In [2]:
with open('/raid/xiaoyuz1/goemotions/goemotions/data/emotions.txt', 'r') as f:
    emotion_list = f.readlines()
    emotion_list = [s.strip() for s in emotion_list]

emotion_text_to_label = dict(zip(emotion_list, range(len(emotion_list))))
label_to_emotion_text = dict(zip(range(len(emotion_list)), emotion_list))


ekman_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/ekman_mapping.json'))
go_to_ekman = {'neutral' : 'neutral'}
for k,v in ekman_to_go.items():
    for vi in v:
        go_to_ekman[vi] = k
ekman_texts = list(ekman_to_go.keys())
ekman_to_idx = dict(zip(ekman_texts, range(len(ekman_texts))))
ekman_to_idx["neutral"] = len(ekman_texts)


sentiment_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/sentiment_mapping.json'))
go_to_sentiment = {'neutral' : 'neutral'}
for k,v in sentiment_to_go.items():
    for vi in v:
        go_to_sentiment[vi] = k
sentiment_texts = list(sentiment_to_go.keys())
sentiment_to_idx = dict(zip(sentiment_texts, range(len(sentiment_texts))))
sentiment_to_idx["neutral"] = len(sentiment_texts)

In [3]:
SEEDS = [
    79719,30010,46921,25577,52538,56440,41228,66558,48642,69556
]

In [4]:
meta_fname_template = "/raid/xiaoyuz1/goemotions/save_path/bert_seed-{}-meta.pkl"
pkl_fname_template = "/raid/xiaoyuz1/goemotions/pred_result/baseline/test_pred_seed-{}_label.pkl"

meta_fname_template_ekman = "/raid/xiaoyuz1/goemotions/save_path/ekman/bert_seed-{}-meta.pkl"
pkl_fname_template_ekman = "/raid/xiaoyuz1/goemotions/pred_result/baseline/ekman/test_pred_seed-{}_label.pkl"

In [21]:

def report_all_metrics(pkl_fname_template, seeds):
    precs = []
    recalls = []
    f1s = []
    
    for seed in seeds: 
        fh = open(pkl_fname_template.format(seed), 'rb')
        gt, pred, _ = pickle.load(fh)
    
        prec = skmetric.precision_score(gt, pred, average="macro")
        recall = skmetric.recall_score(gt, pred, average="macro")
        f1 = skmetric.f1_score(gt, pred, average="macro")
        
        precs += [prec]
        recalls += [recall]
        f1s += [f1]
    
    df_table = pd.DataFrame({'Seeds': seeds,
                       'Precision': precs,
                       'Recall': recalls,
                      'F1' : f1s})
    
    prec_avg, prec_std = np.mean(precs), np.std(precs)
    recall_avg, recall_std = np.mean(recalls), np.std(recalls)
    f1_avg, f1_std = np.mean(f1s), np.std(f1s)
    
    # macro-average 0.40 0.63 0.46
    print(
        "textbf{macro-average}",
        " & ", 
        "{:.2f}".format(prec_avg),
        " & ", 
        "{:.2f}".format(recall_avg),
        " & ", 
        "{:.4f}".format(f1_avg),
    )
    print(
        "textbf{std}",
        " & ", 
        "{:.2f}".format(prec_std),
        " & ", 
        "{:.2f}".format(recall_std),
        " & ", 
        "{:.2f}".format(f1_std),
    )
    
    return df_table
    
report_all_metrics(pkl_fname_template_ekman, SEEDS)

textbf{macro-average}  &  0.68  &  0.58  &  0.6201
textbf{std}  &  0.02  &  0.02  &  0.00


Unnamed: 0,Seeds,Precision,Recall,F1
0,79719,0.694714,0.565405,0.619259
1,30010,0.70723,0.563227,0.617371
2,46921,0.66196,0.602425,0.62451
3,25577,0.701746,0.573445,0.615358
4,52538,0.699632,0.566913,0.618604
5,56440,0.648198,0.597001,0.620539
6,41228,0.658231,0.594505,0.621407
7,66558,0.691155,0.577166,0.623894
8,48642,0.700901,0.566802,0.610979
9,69556,0.681875,0.605428,0.629473


In [5]:
def individual_emotion_result(gt, pred, emotion_list, label2idx=None, text2idx=None):
    precs = []
    recalls = []
    f1s = []

    for emotion_label, emotion_text in enumerate(emotion_list):
        if label2idx is not None:
            emotion_idx = label2idx[emotion_label]
        else:
            assert text2idx is not None
            emotion_idx = text2idx[emotion_text]
            
        y_true = np.asarray(gt)
        y_pred = np.asarray(pred)

        y_true = y_true[:,emotion_idx]
        y_pred = y_pred[:,emotion_idx]

        prec = skmetric.precision_score(y_true, y_pred, average="binary")
        recall = skmetric.recall_score(y_true, y_pred, average="binary")
        f1 = skmetric.f1_score(y_true, y_pred, average="binary")

        precs += [prec]
        recalls += [recall]
        f1s += [f1]

    df_table_4 = pd.DataFrame({'Emotion': emotion_list,
                       'Precision': precs,
                       'Recall': recalls,
                      'F1' : f1s})
    
#     for i in range(len(emotion_list)):
#         print(
#             emotion_list[i], 
#             " & ", 
#             precs[i],
#             " & ", 
#             recalls[i],
#             " & ", 
#             f1s[i],
#             " \\\ ", 
#         )
    
    return df_table_4

In [18]:
def all_individual_emotion_results(emotion_list, pkl_fname_template, meta_fname_template, seeds):
    df_tables = []
    for seed in seeds: 
        fh = open(pkl_fname_template.format(seed), 'rb')
        gt, pred, _ = pickle.load(fh)
        
        fh2 = open(meta_fname_template.format(seed), "rb")
        meta = pickle.load(fh2)
        
        label2idx = meta['labels2idxes'][0][0]
        label2idx = {int(key) : int(value) for key, value in label2idx.items()}
        idx2label = {value: key for key, value in label2idx.items()}
        
        df_table = individual_emotion_result(gt, pred, emotion_list, label2idx=label2idx)
        df_tables.append(df_table)
    
    for i in range(len(emotion_list)):
        precs = []
        recalls = []
        f1s = []
        
        for df_table in df_tables:
            precs += [df_table.iloc[i]['Precision']]
            recalls += [df_table.iloc[i]['Recall']]
            f1s += [df_table.iloc[i]['F1']]
        
        prec_avg, prec_std = np.mean(precs), np.std(precs)
        recall_avg, recall_std = np.mean(recalls), np.std(recalls)
        f1_avg, f1_std = np.mean(f1s), np.std(f1s)

        print(
            emotion_list[i], 
            " & ", 
            "{:.2f}".format(prec_avg),
            " & ", 
            "{:.2f}".format(recall_avg),
            " & ", 
            "{:.2f}".format(f1_avg),
            " \\\ ", 
        )

In [19]:
all_individual_emotion_results(
    list(ekman_to_idx.keys()), pkl_fname_template_ekman, meta_fname_template_ekman, SEEDS)

anger  &  0.60  &  0.50  &  0.54  \\ 
disgust  &  0.68  &  0.35  &  0.46  \\ 
fear  &  0.67  &  0.66  &  0.67  \\ 
joy  &  0.83  &  0.82  &  0.83  \\ 
sadness  &  0.69  &  0.54  &  0.60  \\ 
surprise  &  0.62  &  0.59  &  0.60  \\ 
neutral  &  0.70  &  0.59  &  0.64  \\ 


In [None]:

meta_fname = ""
with open(meta_fname, "rb") as f:
    meta = pickle.load(f)

label2idx = meta['labels2idxes'][0][0]
label2idx = {int(key) : int(value) for key, value in label2idx.items()}
idx2label = {value: key for key, value in label2idx.items()}


In [16]:
# def remap_vector(mat, idx_to_go_label, label_to_go_text, go_to_dict, new_label_to_idx, only_consider_cols=None):
#     new_mat = np.zeros((len(mat), len(new_label_to_idx)))
    
#     for row_idx, row in enumerate(mat):
#         labels = np.where(row)[0]
#         for idx in labels:
#             if only_consider_cols is not None:
#                 if idx not in only_consider_cols:
#                     continue
#             go_text = label_to_go_text[idx_to_go_label[idx]]
#             new_label = go_to_dict[go_text]
#             new_idx = new_label_to_idx[new_label]
            
#             new_mat[row_idx, new_idx] = 1.0
#     return new_mat

## Related to FSJ Analysis

> we subsample the existing “all”
dataset to select only data points originally labeled
with fear, joy, or sadness, for a final set of 4,136
data points (3,342 of which are the train set).

In [17]:
FEAR_LABEL_STR = str(emotion_text_to_label['fear'])
SADNESS_LABEL_STR = str(emotion_text_to_label['sadness'])
JOY_LABEL_STR = str(emotion_text_to_label['joy'])

print("fear: ", FEAR_LABEL_STR)
print("sadness: ", SADNESS_LABEL_STR)
print("joy: ", JOY_LABEL_STR)

fear:  14
sadness:  25
joy:  17


In [18]:
acc_train = 0
for ele in pd.read_csv("/raid/xiaoyuz1/goemotions/goemotions/data/train.csv")['label'].to_numpy():
    if FEAR_LABEL_STR in ele or SADNESS_LABEL_STR in ele or JOY_LABEL_STR in ele:        
        acc_train += 1
print("In train, FSJ count: ", acc_train)

In train, FSJ count:  3342


In [19]:
acc_dev = 0
for ele in pd.read_csv("/raid/xiaoyuz1/goemotions/goemotions/data/dev.csv")['label'].to_numpy():
    if FEAR_LABEL_STR in ele or SADNESS_LABEL_STR in ele or JOY_LABEL_STR in ele:        
        acc_dev += 1
print("In dev, FSJ count: ", acc_dev)

In dev, FSJ count:  401


In [20]:
acc_test = 0
for ele in pd.read_csv("/raid/xiaoyuz1/goemotions/goemotions/data/test.csv")['label'].to_numpy():
    if FEAR_LABEL_STR in ele or SADNESS_LABEL_STR in ele or JOY_LABEL_STR in ele:        
        acc_test += 1
print("In test, FSJ count: ", acc_test)

In test, FSJ count:  393


In [21]:
print("Total, FSJ count: ", acc_train+acc_dev+acc_test)

Total, FSJ count:  4136


In [22]:
idxs = [
    label2idx[emotion_text_to_label['fear']],
    label2idx[emotion_text_to_label['sadness']],
    label2idx[emotion_text_to_label['joy']],
]
def fsj_mask(gt): 
    # gt: (total_samples, total_class) = (5427, 28)
    gt = np.asarray(gt)
    mask = np.zeros(len(gt))
    for fsj_idx in idxs:
        mask += (gt[:,fsj_idx] > 0.0).astype(int)
    mask = mask > 0.0
    return mask

In [23]:
idxs

[2, 17, 19]

In [24]:
go_to_fsj = {
    'fear' : 'fear',
    'sadness' : 'sadness',
    'joy' : 'joy',
}
fsj_to_idx = {
    'fear' : 0,
    'sadness' : 1,
    'joy' : 2,
}

In [26]:

all_gt_fsj = np.asarray(all_gt)[fsj_mask(all_gt)]
all_pred_fsj = np.asarray(all_pred)[fsj_mask(all_gt)]

all_gt_fsj = remap_vector(all_gt_fsj, idx2label, label_to_emotion_text, go_to_fsj, fsj_to_idx, only_consider_cols=idxs)
all_pred_fsj = remap_vector(all_pred_fsj, idx2label, label_to_emotion_text, go_to_fsj, fsj_to_idx, only_consider_cols=idxs)

report_all_metric(all_gt_fsj, all_pred_fsj)

Precision:  0.978266888150609
Recall:  0.6068774220948134
F1:  0.7468686090082116


In [37]:
all_gt_sent = remap_vector(all_gt, idx2label, label_to_emotion_text, go_to_sentiment, sentiment_to_idx)
all_pred_sent = remap_vector(all_pred, idx2label, label_to_emotion_text, go_to_sentiment, sentiment_to_idx)

report_all_metric(all_gt_sent, all_pred_sent)

Precision:  0.6492570639502065
Recall:  0.6839896221121848
F1:  0.6656686806539363


In [38]:
all_gt_ek = remap_vector(all_gt, idx2label, label_to_emotion_text, go_to_ekman, ekman_to_idx)
all_pred_ek = remap_vector(all_pred, idx2label, label_to_emotion_text, go_to_ekman, ekman_to_idx)

report_all_metric(all_gt_ek, all_pred_ek)

Precision:  0.5876198382694658
Recall:  0.6222255936727717
F1:  0.6033296321126239


In [40]:
individual_emotion_result(all_gt_ek, all_pred_ek, list(ekman_to_idx.keys()), text2idx=ekman_to_idx)

Unnamed: 0,Emotion,Precision,Recall,F1
0,anger,0.497468,0.541322,0.51847
1,disgust,0.522936,0.463415,0.491379
2,fear,0.587719,0.683673,0.632075
3,joy,0.78458,0.822243,0.802971
4,sadness,0.551313,0.609499,0.578947
5,surprise,0.535302,0.615953,0.572802
6,neutral,0.634021,0.619474,0.626663


In [41]:
individual_emotion_result(all_gt, all_pred, emotion_list, label2idx=label2idx)

Unnamed: 0,Emotion,Precision,Recall,F1
0,admiration,0.636697,0.688492,0.661582
1,amusement,0.757576,0.852273,0.802139
2,anger,0.449074,0.489899,0.468599
3,annoyance,0.293629,0.33125,0.311307
4,approval,0.315789,0.324786,0.320225
5,caring,0.356688,0.414815,0.383562
6,confusion,0.385787,0.496732,0.434286
7,curiosity,0.448549,0.598592,0.512821
8,desire,0.553571,0.373494,0.446043
9,disappointment,0.270588,0.304636,0.286604


In [None]:
fname = "/raid/xiaoyuz1/goemotions/pred_result/baseline/table_4.csv"


In [9]:
from absl import flags


FLAGS = flags.FLAGS
flags.DEFINE_string("test_data", None, "Test tsv file with true labels.")
flags.DEFINE_string("predictions", None, "Predictions tsv file.")
flags.DEFINE_string("output", "results.json", "Output json file.")
flags.DEFINE_string("emotion_file", "data/emotions.txt",
                    "File containing list of emotions.")
flags.DEFINE_boolean("add_neutral", True, "Whether to add neutral as emotion.")
flags.DEFINE_float("threshold", 0.3, "Threshold for binarizing predictions.")



<absl.flags._flagvalues.FlagHolder at 0x7fc4a05a4b80>

In [26]:

# preds = pd.read_csv(FLAGS.predictions, sep="\t")
# true = pd.read_csv(
#   FLAGS.test_data, sep="\t", header=None, names=["text", "labels", "id"])
# emotions = open(FLAGS.emotion_file).read().splitlines()
# if FLAGS.add_neutral:
# emotions.append("neutral")
num_emotions = 28

# idx2emotion = {i: e for i, e in enumerate(emotions)}

# preds_mat = np.zeros((len(preds), num_emotions))
# true_mat = np.zeros((len(preds), num_emotions))
# for i in range(len(preds)):
# true_labels = [int(idx) for idx in true.loc[i, "labels"].split(",")]

# for j in range(num_emotions):
#     preds_mat[i, j] = preds.loc[i, idx2emotion[j]]
#     true_mat[i, j] = 1 if j in true_labels else 0

true_mat = np.asarray(all_gt)
preds_mat = np.asarray(all_pred)

# threshold = FLAGS.threshold
pred_ind = preds_mat.copy()
# pred_ind[pred_ind > threshold] = 1
# pred_ind[pred_ind <= threshold] = 0
results = {}

results["accuracy"] = skmetric.accuracy_score(true_mat, pred_ind)

results["macro_precision"], results["macro_recall"], results[
  "macro_f1"], _ = skmetric.precision_recall_fscore_support(
      true_mat, pred_ind, average="macro")

results["micro_precision"], results["micro_recall"], results[
  "micro_f1"], _ = skmetric.precision_recall_fscore_support(
      true_mat, pred_ind, average="micro")

results["weighted_precision"], results["weighted_recall"], results[
  "weighted_f1"], _ = skmetric.precision_recall_fscore_support(
      true_mat, pred_ind, average="weighted")

for i in range(num_emotions):
    emotion = emotion_list[int(idx2label[i])]
    emotion_true = true_mat[:, i]
    emotion_pred = pred_ind[:, i]
    results[emotion + "_accuracy"] = skmetric.accuracy_score(emotion_true, emotion_pred)
    results[emotion + "_precision"], results[emotion + "_recall"], results[
        emotion + "_f1"], _ = skmetric.precision_recall_fscore_support(
            emotion_true, emotion_pred, average="binary")


In [27]:
results

{'accuracy': 0.4203058780173208,
 'macro_precision': 0.49868664451221195,
 'macro_recall': 0.48911210794194065,
 'macro_f1': 0.4888277227746614,
 'micro_precision': 0.5620967741935484,
 'micro_recall': 0.5506399115184073,
 'micro_f1': 0.5563093622795116,
 'weighted_precision': 0.5605414951345004,
 'weighted_recall': 0.5506399115184073,
 'weighted_f1': 0.5536519396216166,
 'neutral_accuracy': 0.7626681407775935,
 'neutral_precision': 0.6537276648182379,
 'neutral_recall': 0.5937325125909345,
 'neutral_f1': 0.6222873900293254,
 'anger_accuracy': 0.9618573797678275,
 'anger_precision': 0.47593582887700536,
 'anger_recall': 0.4494949494949495,
 'anger_f1': 0.4623376623376624,
 'fear_accuracy': 0.9896812235120693,
 'fear_precision': 0.627906976744186,
 'fear_recall': 0.6923076923076923,
 'fear_f1': 0.6585365853658537,
 'annoyance_accuracy': 0.9198452183526811,
 'annoyance_precision': 0.31511254019292606,
 'annoyance_recall': 0.30625,
 'annoyance_f1': 0.31061806656101426,
 'surprise_accuracy