In [1]:
import csv
import numpy as np
import pandas as pd
import json
import pickle

from main import train_setup
import sklearn.metrics as skmetric


In [2]:
with open('/raid/xiaoyuz1/goemotions/goemotions/data/emotions.txt', 'r') as f:
    emotion_list = f.readlines()
    emotion_list = [s.strip() for s in emotion_list]

emotion_text_to_label = dict(zip(emotion_list, range(len(emotion_list))))
label_to_emotion_text = dict(zip(range(len(emotion_list)), emotion_list))


ekman_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/ekman_mapping.json'))
go_to_ekman = {'neutral' : 'neutral'}
for k,v in ekman_to_go.items():
    for vi in v:
        go_to_ekman[vi] = k
ekman_texts = list(ekman_to_go.keys())
ekman_to_idx = dict(zip(ekman_texts, range(len(ekman_texts))))
ekman_to_idx["neutral"] = len(ekman_texts)


sentiment_to_go = json.load(open('/raid/xiaoyuz1/goemotions/goemotions/data/sentiment_mapping.json'))
go_to_sentiment = {'neutral' : 'neutral'}
for k,v in sentiment_to_go.items():
    for vi in v:
        go_to_sentiment[vi] = k
sentiment_texts = list(sentiment_to_go.keys())
sentiment_to_idx = dict(zip(sentiment_texts, range(len(sentiment_texts))))
sentiment_to_idx["neutral"] = len(sentiment_texts)

In [3]:
SEEDS = [
    79719,30010,46921,25577,52538,56440,41228,66558,48642,69556
]

In [4]:
def report_all_metrics(pkl_fname_template, seeds):
    precs = []
    recalls = []
    f1s = []
    
    for seed in seeds: 
        fh = open(pkl_fname_template.format(seed), 'rb')
        gt, pred, _ = pickle.load(fh)
    
        prec = skmetric.precision_score(gt, pred, average="macro")
        recall = skmetric.recall_score(gt, pred, average="macro")
        f1 = skmetric.f1_score(gt, pred, average="macro")
        
        precs += [prec]
        recalls += [recall]
        f1s += [f1]
    
    df_table = pd.DataFrame({'Seeds': seeds,
                       'Precision': precs,
                       'Recall': recalls,
                      'F1' : f1s})
    
    prec_avg, prec_std = np.mean(precs), np.std(precs)
    recall_avg, recall_std = np.mean(recalls), np.std(recalls)
    f1_avg, f1_std = np.mean(f1s), np.std(f1s)
    
    # macro-average 0.40 0.63 0.46
    print(
        "textbf{macro-average}",
        " & ", 
        "{:.2f}".format(prec_avg),
        " & ", 
        "{:.2f}".format(recall_avg),
        " & ", 
        "{:.4f}".format(f1_avg),
    )
    print(
        "textbf{std}",
        " & ", 
        "{:.2f}".format(prec_std),
        " & ", 
        "{:.2f}".format(recall_std),
        " & ", 
        "{:.2f}".format(f1_std),
    )
    
    return df_table

In [6]:
meta_fname_template = "/raid/xiaoyuz1/goemotions/save_path/multi_alt_semeval_oc/bert_seed-{}-meta.pkl"
pkl_fname_template = "/raid/xiaoyuz1/goemotions/pred_result/multi_alt_semeval_oc/test_pred_seed-{}_label.pkl"

report_all_metrics(pkl_fname_template, [12345])

textbf{macro-average}  &  0.50  &  0.45  &  0.4632
textbf{std}  &  0.00  &  0.00  &  0.00



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Unnamed: 0,Seeds,Precision,Recall,F1
0,12345,0.504586,0.445829,0.46317


In [5]:
meta_fname_template = "/raid/xiaoyuz1/goemotions/save_path/bert_seed-{}-meta.pkl"
pkl_fname_template = "/raid/xiaoyuz1/goemotions/pred_result/baseline/test_pred_seed-{}_label.pkl"

report_all_metrics(pkl_fname_template, SEEDS)

textbf{macro-average}  &  0.52  &  0.49  &  0.4981
textbf{std}  &  0.01  &  0.01  &  0.01


Unnamed: 0,Seeds,Precision,Recall,F1
0,79719,0.501296,0.47288,0.480138
1,30010,0.518331,0.501488,0.505755
2,46921,0.533289,0.4934,0.50416
3,25577,0.526374,0.484424,0.498074
4,52538,0.516118,0.479313,0.489924
5,56440,0.534865,0.488637,0.503867
6,41228,0.521395,0.496942,0.503264
7,66558,0.513581,0.490751,0.497233
8,48642,0.533952,0.49851,0.510104
9,69556,0.498687,0.489112,0.488828


In [6]:
meta_fname_template_ekman = "/raid/xiaoyuz1/goemotions/save_path/ekman/bert_seed-{}-meta.pkl"
pkl_fname_template_ekman = "/raid/xiaoyuz1/goemotions/pred_result/baseline/ekman/test_pred_seed-{}_label.pkl"
report_all_metrics(pkl_fname_template_ekman, SEEDS)

textbf{macro-average}  &  0.68  &  0.58  &  0.6201
textbf{std}  &  0.02  &  0.02  &  0.00


Unnamed: 0,Seeds,Precision,Recall,F1
0,79719,0.694714,0.565405,0.619259
1,30010,0.70723,0.563227,0.617371
2,46921,0.66196,0.602425,0.62451
3,25577,0.701746,0.573445,0.615358
4,52538,0.699632,0.566913,0.618604
5,56440,0.648198,0.597001,0.620539
6,41228,0.658231,0.594505,0.621407
7,66558,0.691155,0.577166,0.623894
8,48642,0.700901,0.566802,0.610979
9,69556,0.681875,0.605428,0.629473


In [9]:
meta_fname_template_sentiment = "/raid/xiaoyuz1/goemotions/save_path/sentiment/bert_seed-{}-meta.pkl"
pkl_fname_template_sentiment = "/raid/xiaoyuz1/goemotions/pred_result/baseline/sentiment/test_pred_seed-{}_label.pkl"
report_all_metrics(pkl_fname_template_ekman, SEEDS)

textbf{macro-average}  &  0.71  &  0.68  &  0.6918
textbf{std}  &  0.01  &  0.01  &  0.01


Unnamed: 0,Seeds,Precision,Recall,F1
0,79719,0.726623,0.680632,0.702349
1,30010,0.718041,0.671241,0.690375
2,46921,0.712833,0.68145,0.69384
3,25577,0.707923,0.670894,0.685594
4,52538,0.717477,0.673625,0.694396
5,56440,0.689067,0.665779,0.676989
6,41228,0.711155,0.687335,0.696573
7,66558,0.732678,0.664688,0.696262
8,48642,0.71207,0.677816,0.69211
9,69556,0.709424,0.68218,0.690009


In [23]:
pkl_fname_template_fsj = "/raid/xiaoyuz1/goemotions/pred_result/baseline/fsj/test_pred_seed-{}_label.pkl"
report_all_metrics(pkl_fname_template_fsj, SEEDS)

textbf{macro-average}  &  0.92  &  0.93  &  0.9238
textbf{std}  &  0.01  &  0.01  &  0.01


Unnamed: 0,Seeds,Precision,Recall,F1
0,79719,0.913743,0.923342,0.918515
1,30010,0.916631,0.923541,0.919971
2,46921,0.928016,0.936097,0.931988
3,25577,0.915522,0.919202,0.916991
4,52538,0.926877,0.919135,0.922981
5,56440,0.927808,0.929885,0.928744
6,41228,0.916604,0.914928,0.915422
7,66558,0.931454,0.927682,0.929404
8,48642,0.921413,0.929952,0.925472
9,69556,0.926714,0.929885,0.928189


In [None]:
report_all_metrics(pkl_fname_template_sentiment, SEEDS)

In [7]:
def individual_emotion_result(gt, pred, emotion_list, label2idx=None, text2idx=None):
    precs = []
    recalls = []
    f1s = []

    for emotion_label, emotion_text in enumerate(emotion_list):
        if label2idx is not None:
            emotion_idx = label2idx[emotion_label]
        else:
            assert text2idx is not None
            emotion_idx = text2idx[emotion_text]
            
        y_true = np.asarray(gt)
        y_pred = np.asarray(pred)

        y_true = y_true[:,emotion_idx]
        y_pred = y_pred[:,emotion_idx]

        prec = skmetric.precision_score(y_true, y_pred, average="binary")
        recall = skmetric.recall_score(y_true, y_pred, average="binary")
        f1 = skmetric.f1_score(y_true, y_pred, average="binary")

        precs += [prec]
        recalls += [recall]
        f1s += [f1]

    df_table_4 = pd.DataFrame({'Emotion': emotion_list,
                       'Precision': precs,
                       'Recall': recalls,
                      'F1' : f1s})
    
#     for i in range(len(emotion_list)):
#         print(
#             emotion_list[i], 
#             " & ", 
#             precs[i],
#             " & ", 
#             recalls[i],
#             " & ", 
#             f1s[i],
#             " \\\ ", 
#         )
    
    return df_table_4

In [8]:
def all_individual_emotion_results(emotion_list, pkl_fname_template, meta_fname_template, seeds):
    df_tables = []
    for seed in seeds: 
        fh = open(pkl_fname_template.format(seed), 'rb')
        gt, pred, _ = pickle.load(fh)
        
        fh2 = open(meta_fname_template.format(seed), "rb")
        meta = pickle.load(fh2)
        
        label2idx = meta['labels2idxes'][0][0]
        label2idx = {int(key) : int(value) for key, value in label2idx.items()}
        idx2label = {value: key for key, value in label2idx.items()}
        
        df_table = individual_emotion_result(gt, pred, emotion_list, label2idx=label2idx)
        df_tables.append(df_table)
    
    for i in range(len(emotion_list)):
        precs = []
        recalls = []
        f1s = []
        
        for df_table in df_tables:
            precs += [df_table.iloc[i]['Precision']]
            recalls += [df_table.iloc[i]['Recall']]
            f1s += [df_table.iloc[i]['F1']]
        
        prec_avg, prec_std = np.mean(precs), np.std(precs)
        recall_avg, recall_std = np.mean(recalls), np.std(recalls)
        f1_avg, f1_std = np.mean(f1s), np.std(f1s)

        print(
            emotion_list[i], 
            " & ", 
            "{:.2f}".format(prec_avg),
            " & ", 
            "{:.2f}".format(recall_avg),
            " & ", 
            "{:.2f}".format(f1_avg),
            " \\\ ", 
        )

In [9]:
all_individual_emotion_results(
    emotion_list, pkl_fname_template, meta_fname_template, [12345])


Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.



admiration  &  0.66  &  0.66  &  0.66  \\ 
amusement  &  0.79  &  0.83  &  0.81  \\ 
anger  &  0.50  &  0.38  &  0.44  \\ 
annoyance  &  0.35  &  0.25  &  0.29  \\ 
approval  &  0.39  &  0.37  &  0.38  \\ 
caring  &  0.40  &  0.42  &  0.41  \\ 
confusion  &  0.37  &  0.47  &  0.41  \\ 
curiosity  &  0.48  &  0.48  &  0.48  \\ 
desire  &  0.50  &  0.33  &  0.39  \\ 
disappointment  &  0.27  &  0.24  &  0.25  \\ 
disapproval  &  0.34  &  0.37  &  0.35  \\ 
disgust  &  0.57  &  0.37  &  0.45  \\ 
embarrassment  &  0.56  &  0.38  &  0.45  \\ 
excitement  &  0.39  &  0.42  &  0.40  \\ 
fear  &  0.68  &  0.64  &  0.66  \\ 
gratitude  &  0.94  &  0.89  &  0.92  \\ 
grief  &  0.00  &  0.00  &  0.00  \\ 
joy  &  0.62  &  0.57  &  0.59  \\ 
love  &  0.73  &  0.82  &  0.77  \\ 
nervousness  &  0.21  &  0.13  &  0.16  \\ 
optimism  &  0.54  &  0.56  &  0.55  \\ 
pride  &  0.71  &  0.31  &  0.43  \\ 
realization  &  0.25  &  0.19  &  0.21  \\ 
relief  &  0.50  &  0.09  &  0.15  \\ 
remorse  &  0.59

In [10]:
all_individual_emotion_results(
    emotion_list, pkl_fname_template, meta_fname_template, SEEDS)

admiration  &  0.65  &  0.68  &  0.66  \\ 
amusement  &  0.76  &  0.86  &  0.81  \\ 
anger  &  0.53  &  0.45  &  0.49  \\ 
annoyance  &  0.34  &  0.32  &  0.33  \\ 
approval  &  0.36  &  0.34  &  0.35  \\ 
caring  &  0.41  &  0.37  &  0.39  \\ 
confusion  &  0.39  &  0.46  &  0.42  \\ 
curiosity  &  0.47  &  0.54  &  0.50  \\ 
desire  &  0.55  &  0.42  &  0.48  \\ 
disappointment  &  0.30  &  0.24  &  0.26  \\ 
disapproval  &  0.39  &  0.35  &  0.37  \\ 
disgust  &  0.51  &  0.43  &  0.47  \\ 
embarrassment  &  0.50  &  0.38  &  0.43  \\ 
excitement  &  0.45  &  0.40  &  0.42  \\ 
fear  &  0.63  &  0.67  &  0.65  \\ 
gratitude  &  0.91  &  0.90  &  0.91  \\ 
grief  &  0.39  &  0.25  &  0.30  \\ 
joy  &  0.58  &  0.60  &  0.59  \\ 
love  &  0.74  &  0.83  &  0.78  \\ 
nervousness  &  0.34  &  0.37  &  0.35  \\ 
optimism  &  0.58  &  0.52  &  0.55  \\ 
pride  &  0.71  &  0.37  &  0.48  \\ 
realization  &  0.28  &  0.19  &  0.23  \\ 
relief  &  0.44  &  0.35  &  0.39  \\ 
remorse  &  0.55

In [19]:
all_individual_emotion_results(
    list(ekman_to_idx.keys()), pkl_fname_template_ekman, meta_fname_template_ekman, SEEDS)

anger  &  0.60  &  0.50  &  0.54  \\ 
disgust  &  0.68  &  0.35  &  0.46  \\ 
fear  &  0.67  &  0.66  &  0.67  \\ 
joy  &  0.83  &  0.82  &  0.83  \\ 
sadness  &  0.69  &  0.54  &  0.60  \\ 
surprise  &  0.62  &  0.59  &  0.60  \\ 
neutral  &  0.70  &  0.59  &  0.64  \\ 


In [10]:
all_individual_emotion_results(
    list(sentiment_to_idx.keys()), pkl_fname_template_sentiment, meta_fname_template_sentiment, SEEDS)

positive  &  0.84  &  0.82  &  0.83  \\ 
negative  &  0.70  &  0.68  &  0.69  \\ 
ambiguous  &  0.61  &  0.61  &  0.61  \\ 
neutral  &  0.70  &  0.60  &  0.64  \\ 


In [16]:
sentiment_to_go

{'positive': ['amusement',
  'excitement',
  'joy',
  'love',
  'desire',
  'optimism',
  'caring',
  'pride',
  'admiration',
  'gratitude',
  'relief',
  'approval'],
 'negative': ['fear',
  'nervousness',
  'remorse',
  'embarrassment',
  'disappointment',
  'sadness',
  'grief',
  'disgust',
  'anger',
  'annoyance',
  'disapproval'],
 'ambiguous': ['realization', 'surprise', 'curiosity', 'confusion']}

In [16]:
# def remap_vector(mat, idx_to_go_label, label_to_go_text, go_to_dict, new_label_to_idx, only_consider_cols=None):
#     new_mat = np.zeros((len(mat), len(new_label_to_idx)))
    
#     for row_idx, row in enumerate(mat):
#         labels = np.where(row)[0]
#         for idx in labels:
#             if only_consider_cols is not None:
#                 if idx not in only_consider_cols:
#                     continue
#             go_text = label_to_go_text[idx_to_go_label[idx]]
#             new_label = go_to_dict[go_text]
#             new_idx = new_label_to_idx[new_label]
            
#             new_mat[row_idx, new_idx] = 1.0
#     return new_mat