Final Thesis Coding

!pip install emoji

!pip install contractions

!pip install transformers

!pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116

!pip install wordcloud

In [1]:
# Data manipulation libraries
import sys, os
import pandas as pd
import numpy as np
import json

import emoji
import contractions
import re

# Scikit-learn packages
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report, multilabel_confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

# Packages to define a BERT model
from transformers import BertTokenizerFast as BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
from transformers import AutoConfig, AutoModel, BertConfig, BertTokenizerFast, TFBertModel

# from tqdm.auto import tqdm
import tqdm
import torch
from torch.autograd import Variable
import string
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from torch.optim import Adadelta
import torch.nn.functional as F

# packages for visualization
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

from wordcloud import WordCloud
from IPython.display import Image
from IPython import display


%matplotlib inline
%config InlineBackend.figure_format='retina'

import itertools
import math

RANDOM_SEED = 42

In [2]:
import random
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONSOUJANYA'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # Some cudnn methods can be random even after fixing the seed 
    # unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True

seed_everything()

## 1.2 - Loading datasets and lists of emotions

First, let's load our clean data.

In [3]:
# Importing train, validation and test datasets with preprocessed texts and labels
train_GE = pd.read_csv("train_clean.csv")
val_GE = pd.read_csv("val_clean.csv")
test_GE = pd.read_csv("test_clean.csv")

# Shape validation
print(train_GE.shape)
print(val_GE.shape)
print(test_GE.shape)


(43410, 29)
(5426, 29)
(5427, 29)


In [4]:
train_GE_Sampled = train_GE
val_GE_Sampled = val_GE
test_GE_Sampled = test_GE

# Shape validation
print(train_GE_Sampled.shape)
print(val_GE_Sampled.shape)
print(test_GE_Sampled.shape)

(43410, 29)
(5426, 29)
(5427, 29)


In [5]:
target_cols = [
 'admiration', 
    'amusement', 
    'anger', 
    'annoyance', 
    'approval', 
    'caring', 
    'confusion', 
    'curiosity', 
    'desire', 
    'disappointment', 
    'disapproval', 
    'disgust', 
    'embarrassment', 
    'excitement', 
    'fear', 
    'gratitude', 
    'grief', 
    'joy', 
    'love', 
    'nervousness', 
    'optimism', 
    'pride', 
    'realization', 
    'relief', 
    'remorse', 
    'sadness', 
    'surprise',
    'neutral'
]

In [6]:
train_GE_Sampled = train_GE_Sampled.rename(columns={'Clean_text': 'text'})
val_GE_Sampled = val_GE_Sampled.rename(columns={'Clean_text': 'text'})
test_GE_Sampled = test_GE_Sampled.rename(columns={'Clean_text': 'text'})


In [7]:
# Loading emotion labels for GoEmotions taxonomy
with open("emotions.txt", "r") as file:
    GE_taxonomy = file.read().split("\n")

print("Emotions on GoEmotions taxonomy are : \n{}".format(GE_taxonomy))

print()

# Loading emotion labels for Ekman taxonomy
with open("ekman_labels.txt", "r") as file:
    Ekman_taxonomy = file.read().split("\n")

print("Emotions on Ekman taxonomy are : \n{}".format(Ekman_taxonomy))

Emotions on GoEmotions taxonomy are : 
['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']

Emotions on Ekman taxonomy are : 
['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']


In [8]:
GE_taxonomy

['admiration',
 'amusement',
 'anger',
 'annoyance',
 'approval',
 'caring',
 'confusion',
 'curiosity',
 'desire',
 'disappointment',
 'disapproval',
 'disgust',
 'embarrassment',
 'excitement',
 'fear',
 'gratitude',
 'grief',
 'joy',
 'love',
 'nervousness',
 'optimism',
 'pride',
 'realization',
 'relief',
 'remorse',
 'sadness',
 'surprise',
 'neutral']

In [9]:
GE_taxonomy_no_neu = GE_taxonomy.copy()
GE_taxonomy_no_neu.remove('neutral')

In [10]:
GE_taxonomy_no_neu

['admiration',
 'amusement',
 'anger',
 'annoyance',
 'approval',
 'caring',
 'confusion',
 'curiosity',
 'desire',
 'disappointment',
 'disapproval',
 'disgust',
 'embarrassment',
 'excitement',
 'fear',
 'gratitude',
 'grief',
 'joy',
 'love',
 'nervousness',
 'optimism',
 'pride',
 'realization',
 'relief',
 'remorse',
 'sadness',
 'surprise']

sampled_train_df = train_GE_Sampled.sample(frac=1)

len(train_GE_Sampled), len(sampled_train_df)

sampled_train_df.head()

print(sampled_train_df.shape)
print(test_GE.shape)

In [11]:
sampled_train_df = train_GE_Sampled[['text', *target_cols]]

In [12]:
sampled_train_df

Unnamed: 0,text,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,my favourite food is anything i did not have t...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,now if he does off himself everyone will think...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,why the fuck is bayless isoing,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,to make her feel threatened,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,dirty southern wankers,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43405,added you mate well i have just got the bow an...,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
43406,always thought that was funny but is it a refe...,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
43407,what are you talking about ? anything bad that...,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43408,more like a baptism with sexy results !,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Creating GoEmotion excluding neutral emotion

In [13]:
df_train_GE_no_neu = sampled_train_df.copy()
df_val_GE_no_neu = val_GE_Sampled.copy()
df_test_GE_no_neu = test_GE_Sampled.copy()

df_train_GE_no_neu = df_train_GE_no_neu.drop(columns=['neutral'])
df_val_GE_no_neu = df_val_GE_no_neu.drop(columns=['neutral'])
df_test_GE_no_neu = df_test_GE_no_neu.drop(columns=['neutral'])

Then, we need remove all the samples that have been left without a label.

In [14]:
# Removing samples with only 0 in their labels
df_train_GE_no_neu = df_train_GE_no_neu.loc[ df_train_GE_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]
df_val_GE_no_neu = df_val_GE_no_neu.loc[ df_val_GE_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]
df_test_GE_no_neu = df_test_GE_no_neu.loc[ df_test_GE_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]

# Shape validation
print(df_train_GE_no_neu.shape)
print(df_val_GE_no_neu.shape)
print(df_test_GE_no_neu.shape)

(30587, 28)
(3834, 28)
(3821, 28)


In [15]:
# Creating train, validation and test variables
X_train = train_GE_Sampled['text']
y_train = train_GE_Sampled.loc[:, GE_taxonomy].values.astype(float)
X_train_no_neu = df_train_GE_no_neu['text']
y_train_no_neu = df_train_GE_no_neu.loc[:, GE_taxonomy_no_neu].values.astype(float)

X_val = val_GE_Sampled['text']
y_val = val_GE_Sampled.loc[:, GE_taxonomy].values.astype(float)
X_val_no_neu = df_val_GE_no_neu['text']
y_val_no_neu = df_val_GE_no_neu.loc[:, GE_taxonomy_no_neu].values.astype(float)

X_test = test_GE_Sampled['text']
y_test = test_GE_Sampled.loc[:, GE_taxonomy].values.astype(float)
X_test_no_neu = df_test_GE_no_neu['text']
y_test_no_neu = df_test_GE_no_neu.loc[:, GE_taxonomy_no_neu].values.astype(float)

### Creating Ekman lables

In [16]:
df_train = pd.read_csv('https://github.com/google-research/google-research/raw/master/goemotions/data/train.tsv', sep='\t', header=None, names=['Text', 'Class', 'ID']).drop('ID', axis=1)
df_val = pd.read_csv('https://github.com/google-research/google-research/raw/master/goemotions/data/dev.tsv', sep='\t', header=None, names=['Text', 'Class', 'ID']).drop('ID', axis=1)
df_test = pd.read_csv('https://github.com/google-research/google-research/raw/master/goemotions/data/test.tsv', sep='\t', header=None, names=['Text', 'Class', 'ID']).drop('ID', axis=1)


In [17]:
df_train

Unnamed: 0,Text,Class
0,My favourite food is anything I didn't have to...,27
1,"Now if he does off himself, everyone will thin...",27
2,WHY THE FUCK IS BAYLESS ISOING,2
3,To make her feel threatened,14
4,Dirty Southern Wankers,3
...,...,...
43405,Added you mate well I’ve just got the bow and ...,18
43406,Always thought that was funny but is it a refe...,6
43407,What are you talking about? Anything bad that ...,3
43408,"More like a baptism, with sexy results!",13


In [18]:
df_train['List of classes'] = df_train['Class'].apply(lambda x: x.split(','))
df_train['Len of classes'] = df_train['List of classes'].apply(lambda x: len(x))
df_val['List of classes'] = df_val['Class'].apply(lambda x: x.split(','))
df_val['Len of classes'] = df_val['List of classes'].apply(lambda x: len(x))
df_test['List of classes'] = df_test['Class'].apply(lambda x: x.split(','))
df_test['Len of classes'] = df_test['List of classes'].apply(lambda x: len(x))

In [19]:
df_train

Unnamed: 0,Text,Class,List of classes,Len of classes
0,My favourite food is anything I didn't have to...,27,[27],1
1,"Now if he does off himself, everyone will thin...",27,[27],1
2,WHY THE FUCK IS BAYLESS ISOING,2,[2],1
3,To make her feel threatened,14,[14],1
4,Dirty Southern Wankers,3,[3],1
...,...,...,...,...
43405,Added you mate well I’ve just got the bow and ...,18,[18],1
43406,Always thought that was funny but is it a refe...,6,[6],1
43407,What are you talking about? Anything bad that ...,3,[3],1
43408,"More like a baptism, with sexy results!",13,[13],1


In [20]:
with open('ekman_mapping.json') as file:
    ekman_mapping = json.load(file)

In [21]:
emotion_file = open("emotions.txt", "r")
emotion_list = emotion_file.read()
emotion_list = emotion_list.split("\n")
print(emotion_list)

['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']


In [22]:
def idx2class(idx_list):
    arr = []
    for i in idx_list:
        arr.append(emotion_list[int(i)])
    return arr

In [23]:
df_train['Emotions'] = df_train['List of classes'].apply(idx2class)
df_val['Emotions'] = df_val['List of classes'].apply(idx2class)
df_test['Emotions'] = df_test['List of classes'].apply(idx2class)

In [24]:
ekman_mapping.update({'neutral':['neutral']})

In [25]:
ekman_mapping

{'anger': ['anger', 'annoyance', 'disapproval'],
 'disgust': ['disgust'],
 'fear': ['fear', 'nervousness'],
 'joy': ['joy',
  'amusement',
  'approval',
  'excitement',
  'gratitude',
  'love',
  'optimism',
  'relief',
  'pride',
  'admiration',
  'desire',
  'caring'],
 'sadness': ['sadness', 'disappointment', 'embarrassment', 'grief', 'remorse'],
 'surprise': ['surprise', 'realization', 'confusion', 'curiosity'],
 'neutral': ['neutral']}

In [26]:
def EmotionMapping(emotion_list):
    map_list = []
    
    for i in emotion_list:
        if i in ekman_mapping['anger']:
            map_list.append('anger')
        if i in ekman_mapping['disgust']:
            map_list.append('disgust')
        if i in ekman_mapping['fear']:
            map_list.append('fear')
        if i in ekman_mapping['joy']:
            map_list.append('joy')
        if i in ekman_mapping['sadness']:
            map_list.append('sadness')
        if i in ekman_mapping['surprise']:
            map_list.append('surprise')
        if i in ekman_mapping['neutral']:
            map_list.append('neutral')
        
            
    return map_list

In [27]:
df_train['Mapped Emotions'] = df_train['Emotions'].apply(EmotionMapping)
df_val['Mapped Emotions'] = df_val['Emotions'].apply(EmotionMapping)
df_test['Mapped Emotions'] = df_test['Emotions'].apply(EmotionMapping)

In [28]:
df_train.head(3)

Unnamed: 0,Text,Class,List of classes,Len of classes,Emotions,Mapped Emotions
0,My favourite food is anything I didn't have to...,27,[27],1,[neutral],[neutral]
1,"Now if he does off himself, everyone will thin...",27,[27],1,[neutral],[neutral]
2,WHY THE FUCK IS BAYLESS ISOING,2,[2],1,[anger],[anger]


In [29]:
# OneHot encoding for multi-label classification
for emo in ekman_mapping:
    df_train[emo] = np.zeros((len(df_train),1))
    df_train[emo] = df_train['Mapped Emotions'].apply(lambda x: 1 if emo in x else 0)

for emo in ekman_mapping:
    df_val[emo] = np.zeros((len(df_val),1))
    df_val[emo] = df_val['Mapped Emotions'].apply(lambda x: 1 if emo in x else 0)    

for emo in ekman_mapping:
    df_test[emo] = np.zeros((len(df_test),1))
    df_test[emo] = df_test['Mapped Emotions'].apply(lambda x: 1 if emo in x else 0)



In [30]:
df_train.head(3)

Unnamed: 0,Text,Class,List of classes,Len of classes,Emotions,Mapped Emotions,anger,disgust,fear,joy,sadness,surprise,neutral
0,My favourite food is anything I didn't have to...,27,[27],1,[neutral],[neutral],0,0,0,0,0,0,1
1,"Now if he does off himself, everyone will thin...",27,[27],1,[neutral],[neutral],0,0,0,0,0,0,1
2,WHY THE FUCK IS BAYLESS ISOING,2,[2],1,[anger],[anger],1,0,0,0,0,0,0


In [31]:
df_train.drop(['Class','List of classes','Len of classes','Emotions','Mapped Emotions'], axis=1, inplace=True)
df_val.drop(['Class','List of classes','Len of classes','Emotions','Mapped Emotions'], axis=1, inplace=True)
df_test.drop(['Class','List of classes','Len of classes','Emotions','Mapped Emotions'], axis=1, inplace=True)


df_train_no_neu = df_train.copy()
df_val_no_neu = df_val.copy()
df_test_no_neu = df_test.copy()

df_train_no_neu = df_train_no_neu.drop(columns=['neutral'])
df_val_no_neu = df_val_no_neu.drop(columns=['neutral'])
df_test_no_neu = df_test_no_neu.drop(columns=['neutral'])

Then, we need remove all the samples that have been left without a label.

In [32]:
# Removing samples with only 0 in their labels
df_train_no_neu = df_train_no_neu.loc[ df_train_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]
df_val_no_neu = df_val_no_neu.loc[ df_val_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]
df_test_no_neu = df_test_no_neu.loc[ df_test_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]

# Shape validation
print(df_train_no_neu.shape)
print(df_val_no_neu.shape)
print(df_test_no_neu.shape)

(30587, 7)
(3834, 7)
(3821, 7)


In [33]:
# Shape validation
print(df_train_no_neu.shape)
print(df_val_no_neu.shape)
print(df_test_no_neu.shape)

(30587, 7)
(3834, 7)
(3821, 7)


In [34]:
df_train_no_neu.head(3)

Unnamed: 0,Text,anger,disgust,fear,joy,sadness,surprise
2,WHY THE FUCK IS BAYLESS ISOING,1,0,0,0,0,0
3,To make her feel threatened,0,0,1,0,0,0
4,Dirty Southern Wankers,1,0,0,0,0,0


In [35]:
# Building a preprocessing function to clean text
def preprocess_corpus(x):

  # Adding a space between words and punctation
  x = re.sub( r'([a-zA-Z\[\]])([,;.!?])', r'\1 \2', x)
  x = re.sub( r'([,;.!?])([a-zA-Z\[\]])', r'\1 \2', x)
  
  # Demojize
  x = emoji.demojize(x)
  
  # Expand contraction
  x = contractions.fix(x)
  
  # Lower
  x = x.lower()

  #correct some acronyms/typos/abbreviations  
  x = re.sub(r"lmao", "laughing my ass off", x)  
  x = re.sub(r"amirite", "am i right", x)
  x = re.sub(r"\b(tho)\b", "though", x)
  x = re.sub(r"\b(ikr)\b", "i know right", x)
  x = re.sub(r"\b(ya|u)\b", "you", x)
  x = re.sub(r"\b(eu)\b", "europe", x)
  x = re.sub(r"\b(da)\b", "the", x)
  x = re.sub(r"\b(dat)\b", "that", x)
  x = re.sub(r"\b(dats)\b", "that is", x)
  x = re.sub(r"\b(cuz)\b", "because", x)
  x = re.sub(r"\b(fkn)\b", "fucking", x)
  x = re.sub(r"\b(tbh)\b", "to be honest", x)
  x = re.sub(r"\b(tbf)\b", "to be fair", x)
  x = re.sub(r"faux pas", "mistake", x)
  x = re.sub(r"\b(btw)\b", "by the way", x)
  x = re.sub(r"\b(bs)\b", "bullshit", x)
  x = re.sub(r"\b(kinda)\b", "kind of", x)
  x = re.sub(r"\b(bruh)\b", "bro", x)
  x = re.sub(r"\b(w/e)\b", "whatever", x)
  x = re.sub(r"\b(w/)\b", "with", x)
  x = re.sub(r"\b(w/o)\b", "without", x)
  x = re.sub(r"\b(doj)\b", "department of justice", x)
  
  #replace some words with multiple occurences of a letter, example "coooool" turns into --> cool
  x = re.sub(r"\b(j+e{2,}z+e*)\b", "jeez", x)
  x = re.sub(r"\b(co+l+)\b", "cool", x)
  x = re.sub(r"\b(g+o+a+l+)\b", "goal", x)
  x = re.sub(r"\b(s+h+i+t+)\b", "shit", x)
  x = re.sub(r"\b(o+m+g+)\b", "omg", x)
  x = re.sub(r"\b(w+t+f+)\b", "wtf", x)
  x = re.sub(r"\b(w+h+a+t+)\b", "what", x)
  x = re.sub(r"\b(y+e+y+|y+a+y+|y+e+a+h+)\b", "yeah", x)
  x = re.sub(r"\b(w+o+w+)\b", "wow", x)
  x = re.sub(r"\b(w+h+y+)\b", "why", x)
  x = re.sub(r"\b(s+o+)\b", "so", x)
  x = re.sub(r"\b(f)\b", "fuck", x)
  x = re.sub(r"\b(w+h+o+p+s+)\b", "whoops", x)
  x = re.sub(r"\b(ofc)\b", "of course", x)
  x = re.sub(r"\b(the us)\b", "usa", x)
  x = re.sub(r"\b(gf)\b", "girlfriend", x)
  x = re.sub(r"\b(hr)\b", "human ressources", x)
  x = re.sub(r"\b(mh)\b", "mental health", x)
  x = re.sub(r"\b(idk)\b", "i do not know", x)
  x = re.sub(r"\b(gotcha)\b", "i got you", x)
  x = re.sub(r"\b(y+e+p+)\b", "yes", x)
  x = re.sub(r"\b(a*ha+h[ha]*|a*ha +h[ha]*)\b", "haha", x)
  x = re.sub(r"\b(o?l+o+l+[ol]*)\b", "lol", x)
  x = re.sub(r"\b(o*ho+h[ho]*|o*ho +h[ho]*)\b", "ohoh", x)
  x = re.sub(r"\b(o+h+)\b", "oh", x)
  x = re.sub(r"\b(a+h+)\b", "ah", x)
  x = re.sub(r"\b(u+h+)\b", "uh", x)

  # Handling emojis
  x = re.sub(r"<3", " love ", x)
  x = re.sub(r"xd", " smiling_face_with_open_mouth_and_tightly_closed_eyes ", x)
  x = re.sub(r":\)", " smiling_face ", x)
  x = re.sub(r"^_^", " smiling_face ", x)
  x = re.sub(r"\*_\*", " star_struck ", x)
  x = re.sub(r":\(", " frowning_face ", x)
  x = re.sub(r":\^\(", " frowning_face ", x)
  x = re.sub(r";\(", " frowning_face ", x)
  x = re.sub(r":\/",  " confused_face", x)
  x = re.sub(r";\)",  " wink", x)
  x = re.sub(r">__<",  " unamused ", x)
  x = re.sub(r"\b([xo]+x*)\b", " xoxo ", x)
  x = re.sub(r"\b(n+a+h+)\b", "no", x)

  # Handling special cases of text
  x = re.sub(r"h a m b e r d e r s", "hamberders", x)
  x = re.sub(r"b e n", "ben", x)
  x = re.sub(r"s a t i r e", "satire", x)
  x = re.sub(r"y i k e s", "yikes", x)
  x = re.sub(r"s p o i l e r", "spoiler", x)
  x = re.sub(r"thankyou", "thank you", x)
  x = re.sub(r"a^r^o^o^o^o^o^o^o^n^d", "around", x)

  # Remove special characters and numbers replace by space + remove double space
  x = re.sub(r"\b([.]{3,})"," dots ", x)
  x = re.sub(r"[^A-Za-z!?_]+"," ", x)
  x = re.sub(r"\b([s])\b *","", x)
  x = re.sub(r" +"," ", x)
  x = x.strip()

  return x

In [36]:
# Defining the number of samples in train, validation and test dataset
size_train = df_train.shape[0]
size_val = df_val.shape[0]
size_test = df_test.shape[0]

# Defining the total number of samples
size_all = size_train + size_val + size_test

size_train

43410

In [37]:
# Shape of train, validation and test datasets
print("Train dataset has {} samples and represents {:.2f}% of overall data".format(size_train, size_train/size_all*100))
print("Validation dataset has {} samples and represents {:.2f}% of overall data".format(size_val, size_val/size_all*100))
print("Test dataset has {} samples and represents {:.2f}% of overall data".format(size_test, size_test/size_all*100))
print()
print("The total number of samples is : {}".format(size_all))

Train dataset has 43410 samples and represents 80.00% of overall data
Validation dataset has 5426 samples and represents 10.00% of overall data
Test dataset has 5427 samples and represents 10.00% of overall data

The total number of samples is : 54263


In [38]:
# Concatenating the 3 datasets for labels preprocessing
df_all = pd.concat([df_train, df_val, df_test], axis=0).reset_index(drop=True)

# Preview of data
print(df_all.head(5))

                                                Text  anger  disgust  fear  \
0  My favourite food is anything I didn't have to...      0        0     0   
1  Now if he does off himself, everyone will thin...      0        0     0   
2                     WHY THE FUCK IS BAYLESS ISOING      1        0     0   
3                        To make her feel threatened      0        0     1   
4                             Dirty Southern Wankers      1        0     0   

   joy  sadness  surprise  neutral  
0    0        0         0        1  
1    0        0         0        1  
2    0        0         0        0  
3    0        0         0        0  
4    0        0         0        0  


In [39]:
print(df_all.shape)

(54263, 8)


In [40]:
# Applying the preprocessing function on the dataset
df_all["Clean_text"] = df_all["Text"].apply(preprocess_corpus)

# Preview of data
print(df_all[['Text', 'Clean_text']].sample(5))

                                                    Text  \
34480                         Whoa this is really creepy   
15157  Ok, I will take this with a pinch of salt, but...   
28594  I love bloodborne, I started my second play th...   
20091  Oh my goodness, I'm so glad you were there too...   
14786  Yeah -- a woman who expects a guy to never eve...   

                                              Clean_text  
34480                         whoa this is really creepy  
15157  ok i will take this with a pinch of salt but t...  
28594  i love bloodborne i started my second play thr...  
20091  oh my goodness i am so glad you were there too...  
14786  yeah a woman who expects a guy to never even t...  


In [41]:
# Keeping only necessary columns
#df_all = df_all.drop(['Class','List of classes','Len of classes','Emotions'], axis=1)
df_all.head(3)

Unnamed: 0,Text,anger,disgust,fear,joy,sadness,surprise,neutral,Clean_text
0,My favourite food is anything I didn't have to...,0,0,0,0,0,0,1,my favourite food is anything i did not have t...
1,"Now if he does off himself, everyone will thin...",0,0,0,0,0,0,1,now if he does off himself everyone will think...
2,WHY THE FUCK IS BAYLESS ISOING,1,0,0,0,0,0,0,why the fuck is bayless isoing


In [42]:
# Dropping raw text column
df_all = df_all[ ['Clean_text','anger','disgust','fear','joy','sadness','surprise','neutral'] ]
df_all

Unnamed: 0,Clean_text,anger,disgust,fear,joy,sadness,surprise,neutral
0,my favourite food is anything i did not have t...,0,0,0,0,0,0,1
1,now if he does off himself everyone will think...,0,0,0,0,0,0,1
2,why the fuck is bayless isoing,1,0,0,0,0,0,0
3,to make her feel threatened,0,0,1,0,0,0,0
4,dirty southern wankers,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
54258,thanks i was diagnosed with bp after the hospi...,0,0,0,1,0,0,0
54259,well that makes sense,0,0,0,1,0,0,0
54260,daddy issues name,0,0,0,0,0,0,1
54261,so glad i discovered that subreddit a couple m...,0,0,0,1,0,0,0


In [43]:
emotion_dict={
"anger": 0,
"disgust": 1,
"fear": 2,
"joy": 3,
"sadness": 4,
"surprise": 5,
"neutral":6
}

# Dropping Mapped Emotions column

In [44]:
# Building a function that will divide in train, validation and test sets
def get_train_val_test(df):
    train = df.iloc[:size_train, :]
    val = df.iloc[size_train:size_train+size_val, :]
    test = df.iloc[size_train+size_val:size_train+size_val+size_test, :]
    return train, val, test

In [45]:
# Dividing back in train, validation and test datasets (GoEmotions)
train_ekman, val_ekman, test_ekman = get_train_val_test(df_all)
print(train_ekman.shape)
print(val_ekman.shape)
print(test_ekman.shape)

(43410, 8)
(5426, 8)
(5427, 8)


In [46]:
train_ekman.rename(columns={'Clean_text':'text'}, inplace=True)
val_ekman.rename(columns={'Clean_text':'text'}, inplace=True)
test_ekman.rename(columns={'Clean_text':'text'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_ekman.rename(columns={'Clean_text':'text'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_ekman.rename(columns={'Clean_text':'text'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_ekman.rename(columns={'Clean_text':'text'}, inplace=True)


In [47]:
train_ekman.head(3)

Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise,neutral
0,my favourite food is anything i did not have t...,0,0,0,0,0,0,1
1,now if he does off himself everyone will think...,0,0,0,0,0,0,1
2,why the fuck is bayless isoing,1,0,0,0,0,0,0


In [48]:
train_ekman_no_neu = train_ekman.copy()
val_ekman_no_neu = val_ekman.copy()
test_ekman_no_neu = test_ekman.copy()

train_ekman_no_neu.reset_index(inplace = True)
val_ekman_no_neu.reset_index(inplace = True)
test_ekman_no_neu.reset_index(inplace = True)

train_ekman_no_neu = train_ekman_no_neu.drop('neutral', axis=1)
val_ekman_no_neu = val_ekman_no_neu.drop('neutral', axis=1)
test_ekman_no_neu = test_ekman_no_neu.drop('neutral', axis=1)

train_ekman_no_neu = train_ekman_no_neu.drop(columns=['index'])
val_ekman_no_neu = val_ekman_no_neu.drop(columns=['index'])
test_ekman_no_neu = test_ekman_no_neu.drop(columns=['index'])

# Removing samples with only 0 in their labels
train_ekman_no_neu = train_ekman_no_neu.loc[ train_ekman_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]
val_ekman_no_neu = val_ekman_no_neu.loc[ val_ekman_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]
test_ekman_no_neu = test_ekman_no_neu.loc[ test_ekman_no_neu.apply(lambda x: sum(x[1:]), axis=1)>0 ]

# Shape validation
print(train_ekman_no_neu.shape)
print(val_ekman_no_neu.shape)
print(test_ekman_no_neu.shape)

(30587, 7)
(3834, 7)
(3821, 7)


In [49]:
train_ekman_no_neu.head(3)

Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise
2,why the fuck is bayless isoing,1,0,0,0,0,0
3,to make her feel threatened,0,0,1,0,0,0
4,dirty southern wankers,1,0,0,0,0,0


In [50]:
class_label_names_no_neu = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
class_label_names = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']

In [51]:
emotion_label_dict={
0:"anger",
1:"disgust",
2:"fear",
3:"joy",
4:"sadness",
5:"surprise",
6:"neutral"
}

# Loading emotion labels for GoEmotions taxonomy
with open("emotions.txt", "r") as file:
    GE_taxonomy = file.read().split("\n")


EKMAN_taxonomy = ['anger',
 'disgust',
 'fear',
 'joy',
 'sadness',
 'surprise',
 'neutral']


EKMAN_taxonomy_no_neu = ['anger',
 'disgust',
 'fear',
 'joy',
 'sadness',
 'surprise']

In [52]:
val_ekman.shape

(5426, 8)

In [53]:
X_train_ekman = train_ekman[:]["text"]
y_train_ekman = train_ekman.loc[:, EKMAN_taxonomy].values.astype(float)
X_train_ekman_no_neu = train_ekman_no_neu[:]["text"]
y_train_ekman_no_neu = train_ekman_no_neu.loc[:, EKMAN_taxonomy_no_neu].values.astype(float)
X_val_ekman = val_ekman[:]["text"]
y_val_ekman = val_ekman.loc[:, EKMAN_taxonomy].values.astype(float)
X_val_ekman_no_neu = val_ekman_no_neu[:]["text"]
y_val_ekman_no_neu = val_ekman_no_neu.loc[:, EKMAN_taxonomy_no_neu].values.astype(float)
X_test_ekman = test_ekman[:]["text"]
y_test_ekman = test_ekman.loc[:, EKMAN_taxonomy].values.astype(float)
X_test_ekman_no_neu = test_ekman_no_neu[:]["text"]
y_test_ekman_no_neu = test_ekman_no_neu.loc[:, EKMAN_taxonomy_no_neu].values.astype(float)
print(X_train_ekman.shape, y_train_ekman.shape,y_train_ekman_no_neu.shape, 
      X_val_ekman.shape, y_val_ekman.shape,y_val_ekman_no_neu.shape,
      X_test_ekman.shape, y_test_ekman.shape, y_test_ekman_no_neu.shape)

(43410,) (43410, 7) (30587, 6) (5426,) (5426, 7) (3834, 6) (5427,) (5427, 7) (3821, 6)


# 2 - Modeling : BERT (Bidirectional Encoder Representations from Transformers)

Now we can go ahead and start defining our BERT-based model.

## 2.1 - Configuration of the base model

First of all, let's define a `max_length` variable. This variable sets a fixed length of sequences to be fed to our model. Therefore, sequences will be either truncated if larger than this value, or completed using padding if smaller. To avoid truncating, we fix this value according to the largest sample of our data.

In [54]:
# Computing max length of samples
full_text = pd.concat([train_GE_Sampled['text'], val_GE_Sampled['text'], test_GE_Sampled['text']])
max_length = full_text.apply(lambda x: len(x.split())).max()
max_length

48

We are going to use BERT's base model which contains almost 110 M trainable parameters. 

Also, in order to match the tokenization and vocabulary used during the training, we are going to use a BERT tokenizer.

# Importing BERT pre-trained model and tokenizer
model_name = 'bert-base-uncased'
config = BertConfig.from_pretrained(model_name, output_hidden_states=False)
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)
transformer_model = TFBertModel.from_pretrained(model_name, config = config)

!pip install GPUtil

!pip install numba

import torch
from GPUtil import showUtilization as gpu_usage
from numba import cuda

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()                             

    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

free_gpu_cache()

### Ekman Model with 7 Emotions including Neutral

In [108]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 16
VALIDATION_BATCH_SIZE = 16
TEST_BATCH_SIZE =16
EPOCHS = 4
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [109]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [110]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [111]:
test_GE_Sampled.columns

Index(['text', 'admiration', 'amusement', 'anger', 'annoyance', 'approval',
       'caring', 'confusion', 'curiosity', 'desire', 'disappointment',
       'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
       'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride',
       'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'],
      dtype='object')

In [112]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [113]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [114]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [115]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [116]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [117]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7651752829551697
Epoch: 2, Loss:  0.2116960883140564
Epoch: 3, Loss:  0.19146986305713654
Epoch: 4, Loss:  0.037507764995098114


In [118]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [119]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [120]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman = np.array(y_pred_proba_train_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9015434231743837
F1 Score (Micro) = 0.9438676640937091
F1 Score (Macro) = 0.9206389243432047


In [121]:
cr = classification_report(targets_train,y_pred_train_ekman,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.96      0.90      0.93      5579
     disgust       0.96      0.76      0.85       793
        fear       0.97      0.91      0.94       726
         joy       0.97      0.97      0.97     17410
     sadness       0.95      0.92      0.93      3263
    surprise       0.95      0.82      0.88      5367
     neutral       0.98      0.92      0.95     14219

   micro avg       0.97      0.92      0.94     47357
   macro avg       0.96      0.88      0.92     47357
weighted avg       0.97      0.92      0.94     47357
 samples avg       0.97      0.95      0.95     47357



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [123]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman = np.array(y_pred_proba_test_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5962778699097107
F1 Score (Micro) = 0.6696660321854994
F1 Score (Macro) = 0.6039318820329455


In [135]:
# Model evaluation

model_eval(y_test_ekman, y_pred_test_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.54,0.49,0.51
disgust,0.63,0.39,0.48
fear,0.67,0.67,0.67
joy,0.79,0.83,0.81
sadness,0.61,0.56,0.58
surprise,0.61,0.48,0.54
neutral,0.64,0.62,0.63
MACRO-AVERAGE,0.64,0.58,0.6


In [136]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman = np.array(y_pred_proba_val_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.6009952082565426
F1 Score (Micro) = 0.6743924587044885
F1 Score (Macro) = 0.5896398622889973


In [137]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.55,0.5,0.52
disgust,0.52,0.35,0.42
fear,0.72,0.53,0.61
joy,0.81,0.83,0.82
sadness,0.62,0.58,0.6
surprise,0.6,0.47,0.53
neutral,0.64,0.62,0.63
MACRO-AVERAGE,0.64,0.55,0.59


In [138]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [139]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [140]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [141]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_opt, threshold_test_ekman_opt, macro_f1_test_ekman_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_ekman)
print("The model's threshold is {}".format(threshold_test_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_opt))

The model's threshold is 0.30999999999999994
The model's best macro-f1 is 0.6174091824482967


  _warn_prf(average, modifier, msg_start, len(result))


In [142]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_opt, threshold_val_ekman_opt, macro_f1_val_ekman_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_ekman)
print("The model's threshold is {}".format(threshold_val_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_opt))

The model's threshold is 0.32999999999999985
The model's best macro-f1 is 0.6073481129069346


  _warn_prf(average, modifier, msg_start, len(result))


In [143]:
# Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.57,0.53
disgust,0.54,0.46,0.5
fear,0.64,0.72,0.68
joy,0.76,0.85,0.8
sadness,0.54,0.62,0.58
surprise,0.59,0.58,0.59
neutral,0.61,0.7,0.65
MACRO-AVERAGE,0.6,0.64,0.62


In [144]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.52,0.57,0.55
disgust,0.47,0.44,0.46
fear,0.67,0.6,0.63
joy,0.78,0.85,0.81
sadness,0.58,0.64,0.61
surprise,0.56,0.54,0.55
neutral,0.61,0.68,0.64
MACRO-AVERAGE,0.6,0.62,0.61


In [30]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_opt, axis=1)==0)

1328

In [31]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_opt, axis=1)==0)

1297

In [181]:
# Handling empty predictions
y_pred_labels_test_ekman_opt_n = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.57,0.53
disgust,0.54,0.46,0.5
fear,0.64,0.72,0.68
joy,0.76,0.85,0.8
sadness,0.54,0.62,0.58
surprise,0.59,0.58,0.59
neutral,0.61,0.7,0.65
MACRO-AVERAGE,0.6,0.64,0.62


df_train_GE_no_neu
df_val_GE_no_neu
df_test_GE_no_neu

### EKMAN taxonomy with 8 labels excluding neutral emotion

###  Model 2

In [145]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 8
VALIDATION_BATCH_SIZE = 8
TEST_BATCH_SIZE =8
EPOCHS = 4
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [146]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [147]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [148]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [149]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [150]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model_GE_no_neu = BERTClass()
model_GE_no_neu.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [151]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model_GE_no_neu.parameters(), lr=LEARNING_RATE)

In [152]:
def train(epoch,model):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [154]:
for epoch in range(EPOCHS):
    train(epoch, model_GE_no_neu)

Epoch: 1, Loss:  0.31543079018592834
Epoch: 1, Loss:  0.23718954622745514
Epoch: 2, Loss:  0.15672795474529266
Epoch: 2, Loss:  0.21233338117599487
Epoch: 3, Loss:  0.24888412654399872
Epoch: 3, Loss:  0.13314861059188843
Epoch: 4, Loss:  0.11076802760362625
Epoch: 4, Loss:  0.20845213532447815


In [155]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [156]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [157]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_GE_no_neu, targets_train = validation(train_loader,model_GE_no_neu) # epoch
y_pred_train_GE_no_neu = np.array(y_pred_proba_train_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.8975120939875605
F1 Score (Micro) = 0.941391468868249
F1 Score (Macro) = 0.9169220677338153


In [160]:
cr = classification_report(targets_train,y_pred_train_GE_no_neu,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.95      0.89      0.92      5579
     disgust       0.94      0.77      0.84       793
        fear       0.88      0.95      0.92       726
         joy       0.96      0.98      0.97     17410
     sadness       0.96      0.89      0.92      3263
    surprise       0.90      0.92      0.91      5367
     neutral       0.99      0.89      0.94     14219

   micro avg       0.96      0.92      0.94     47357
   macro avg       0.94      0.90      0.92     47357
weighted avg       0.96      0.92      0.94     47357
 samples avg       0.96      0.95      0.95     47357



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Model evaluation
model_eval(targets_train, y_pred_train_GE_no_neu, GE_taxonomy_no_neu)

In [161]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_GE_no_neu, targets_test = validation(test_loader,model_GE_no_neu) # epoch
y_pred_test_GE_no_neu = np.array(y_pred_proba_test_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5793255942509674
F1 Score (Micro) = 0.6630546955624355
F1 Score (Macro) = 0.6014428232997637


In [164]:
# Model evaluation
model_eval(y_test_ekman, y_pred_test_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.56,0.48,0.51
disgust,0.62,0.4,0.49
fear,0.57,0.73,0.64
joy,0.75,0.86,0.8
sadness,0.67,0.53,0.59
surprise,0.55,0.61,0.58
neutral,0.67,0.54,0.6
MACRO-AVERAGE,0.63,0.59,0.6


In [165]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_GE_no_neu, targets_val = validation(validation_loader,model_GE_no_neu) # epoch
y_pred_val_GE_no_neu = np.array(y_pred_proba_val_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.576852193144121
F1 Score (Micro) = 0.6625397644226635
F1 Score (Macro) = 0.5876955775578627


In [166]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.56,0.48,0.52
disgust,0.53,0.38,0.44
fear,0.61,0.62,0.61
joy,0.76,0.85,0.8
sadness,0.69,0.54,0.6
surprise,0.51,0.57,0.54
neutral,0.66,0.54,0.6
MACRO-AVERAGE,0.62,0.57,0.59


In [167]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [168]:
# Generate labels for train, test and val
y_pred_train_labels_no_neu = proba_to_labels(y_pred_proba_train_GE_no_neu)

y_pred_test_labels_no_neu = proba_to_labels(y_pred_proba_test_GE_no_neu)

y_pred_val_labels_no_neu = proba_to_labels(y_pred_proba_val_GE_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [169]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [171]:
##### Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_GE_no_neu_opt, threshold_test_GE_no_neu_opt, macro_f1_test_GE_no_neu_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_GE_no_neu)
print("The model's threshold is {}".format(threshold_test_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_GE_no_neu_opt))

The model's threshold is 0.3699999999999999
The model's best macro-f1 is 0.6082085257448585


  _warn_prf(average, modifier, msg_start, len(result))


In [172]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_GE_no_neu_opt, threshold_val_GE_no_neu_opt, macro_f1_val_GE_no_neu_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_GE_no_neu)
print("The model's threshold is {}".format(threshold_val_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_GE_no_neu_opt))

The model's threshold is 0.2799999999999999
The model's best macro-f1 is 0.6011534108230556


In [173]:
#####  Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.53,0.53,0.53
disgust,0.61,0.45,0.52
fear,0.52,0.73,0.61
joy,0.72,0.88,0.79
sadness,0.64,0.58,0.61
surprise,0.51,0.66,0.58
neutral,0.64,0.61,0.63
MACRO-AVERAGE,0.6,0.63,0.61


In [174]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.51,0.58,0.54
disgust,0.46,0.48,0.47
fear,0.55,0.69,0.61
joy,0.71,0.89,0.79
sadness,0.61,0.61,0.61
surprise,0.46,0.68,0.55
neutral,0.61,0.67,0.64
MACRO-AVERAGE,0.56,0.66,0.6


In [175]:
y_pred_labels_test_GE_no_neu_opt.shape, y_pred_labels_val_GE_no_neu_opt.shape

((5427, 7), (5426, 7))

In [176]:
# Handling empty predictions for test
y_pred_labels_test_GE_no_neu_opt_h = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.51,0.54,0.52
disgust,0.61,0.45,0.52
fear,0.52,0.73,0.61
joy,0.72,0.88,0.79
sadness,0.64,0.58,0.61
surprise,0.51,0.66,0.58
neutral,0.64,0.61,0.63
MACRO-AVERAGE,0.59,0.64,0.61


In [178]:
# Handling empty predictions for val
y_pred_labels_val_GE_no_neu_opt_h = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.51,0.58,0.54
disgust,0.46,0.48,0.47
fear,0.55,0.69,0.61
joy,0.71,0.89,0.79
sadness,0.61,0.61,0.61
surprise,0.46,0.68,0.55
neutral,0.61,0.67,0.64
MACRO-AVERAGE,0.56,0.66,0.6


### Saving the Model

In [179]:
PATH = "BERT_GoEmotion_ekman_1.pt"
torch.save(model_GE_no_neu.state_dict(), PATH)


#### Loading the model

In [180]:
PATH = "BERT_GoEmotion_ekman_1.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 2

In [54]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 16
VALIDATION_BATCH_SIZE = 16
TEST_BATCH_SIZE =16
EPOCHS = 4
LEARNING_RATE = 5e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [55]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [56]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [57]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [58]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [59]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model_GE_no_neu = BERTClass()
model_GE_no_neu.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [60]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model_GE_no_neu.parameters(), lr=LEARNING_RATE)

In [61]:
def train(epoch,model):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [62]:
for epoch in range(EPOCHS):
    train(epoch, model_GE_no_neu)

Epoch: 1, Loss:  0.7721126079559326
Epoch: 2, Loss:  0.26377880573272705
Epoch: 3, Loss:  0.20084314048290253
Epoch: 4, Loss:  0.14197106659412384


In [63]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [64]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [67]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_GE_no_neu, targets_train = validation(train_loader,model_GE_no_neu) # epoch
y_pred_train_GE_no_neu = np.array(y_pred_proba_train_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.8784842202257545
F1 Score (Micro) = 0.9279613493227504
F1 Score (Macro) = 0.9027224806646482


In [68]:
cr = classification_report(targets_train,y_pred_train_GE_no_neu,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.94      0.87      0.90      5579
     disgust       0.88      0.78      0.83       793
        fear       0.89      0.94      0.92       726
         joy       0.97      0.97      0.97     17410
     sadness       0.88      0.93      0.90      3263
    surprise       0.86      0.90      0.88      5367
     neutral       0.99      0.85      0.92     14219

   micro avg       0.95      0.91      0.93     47357
   macro avg       0.92      0.89      0.90     47357
weighted avg       0.95      0.91      0.93     47357
 samples avg       0.95      0.93      0.93     47357



  _warn_prf(average, modifier, msg_start, len(result))


#Model evaluation
model_eval(targets_train, y_pred_train_GE_no_neu, GE_taxonomy_no_neu)

In [69]:
EKMAN_taxonomy# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_GE_no_neu, targets_test = validation(test_loader,model_GE_no_neu) # epoch
y_pred_test_GE_no_neu = np.array(y_pred_proba_test_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5677169707020453
F1 Score (Micro) = 0.6488456865127582
F1 Score (Macro) = 0.5955675680729302


In [70]:
# Model evaluation
model_eval(y_test_ekman, y_pred_test_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.55,0.49,0.52
disgust,0.53,0.45,0.48
fear,0.61,0.73,0.66
joy,0.77,0.84,0.8
sadness,0.53,0.61,0.57
surprise,0.53,0.63,0.58
neutral,0.68,0.47,0.56
MACRO-AVERAGE,0.6,0.6,0.6


In [71]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_GE_no_neu, targets_val = validation(validation_loader,model_GE_no_neu) # epoch
y_pred_val_GE_no_neu = np.array(y_pred_proba_val_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5766678953188352
F1 Score (Micro) = 0.65589997395155
F1 Score (Macro) = 0.5864342999080551


In [72]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.56,0.51,0.53
disgust,0.45,0.42,0.44
fear,0.63,0.62,0.62
joy,0.79,0.83,0.81
sadness,0.54,0.62,0.58
surprise,0.52,0.62,0.57
neutral,0.69,0.47,0.56
MACRO-AVERAGE,0.6,0.58,0.59


In [73]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [74]:
# Generate labels for train, test and val
y_pred_train_labels_no_neu = proba_to_labels(y_pred_proba_train_GE_no_neu)

y_pred_test_labels_no_neu = proba_to_labels(y_pred_proba_test_GE_no_neu)

y_pred_val_labels_no_neu = proba_to_labels(y_pred_proba_val_GE_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [75]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [76]:
##### Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_GE_no_neu_opt, threshold_test_GE_no_neu_opt, macro_f1_test_GE_no_neu_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_GE_no_neu)
print("The model's threshold is {}".format(threshold_test_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_GE_no_neu_opt))

The model's threshold is 0.4099999999999998
The model's best macro-f1 is 0.5977420962180179


In [78]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_GE_no_neu_opt, threshold_val_GE_no_neu_opt, macro_f1_val_GE_no_neu_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_GE_no_neu)
print("The model's threshold is {}".format(threshold_val_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_GE_no_neu_opt))

The model's threshold is 0.4099999999999998
The model's best macro-f1 is 0.5905044990319988


In [79]:
#####  Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.52,0.53,0.53
disgust,0.48,0.48,0.48
fear,0.58,0.77,0.66
joy,0.75,0.85,0.8
sadness,0.51,0.64,0.57
surprise,0.5,0.67,0.58
neutral,0.67,0.51,0.58
MACRO-AVERAGE,0.57,0.64,0.6


In [80]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.52,0.54,0.53
disgust,0.41,0.45,0.43
fear,0.61,0.65,0.63
joy,0.77,0.85,0.81
sadness,0.52,0.65,0.57
surprise,0.49,0.67,0.57
neutral,0.68,0.52,0.59
MACRO-AVERAGE,0.57,0.62,0.59


In [81]:
y_pred_labels_test_GE_no_neu_opt.shape, y_pred_labels_val_GE_no_neu_opt.shape

((5427, 7), (5426, 7))

In [89]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_GE_no_neu_opt, axis=1)==0)

75

In [90]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_GE_no_neu_opt, axis=1)==0)

80

In [83]:
# Handling empty predictions for test
y_pred_labels_test_GE_no_neu_opt_h = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.56,0.53
disgust,0.48,0.48,0.48
fear,0.58,0.77,0.66
joy,0.75,0.85,0.8
sadness,0.51,0.64,0.57
surprise,0.5,0.67,0.58
neutral,0.67,0.51,0.58
MACRO-AVERAGE,0.57,0.64,0.6


In [84]:
# Handling empty predictions for val
y_pred_labels_val_GE_no_neu_opt_h = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.58,0.54
disgust,0.41,0.45,0.43
fear,0.61,0.65,0.63
joy,0.77,0.85,0.81
sadness,0.52,0.65,0.57
surprise,0.49,0.67,0.57
neutral,0.68,0.52,0.59
MACRO-AVERAGE,0.57,0.62,0.59


In [87]:
# Handling empty predictions
y_pred_labels_test_ekman_opt_n = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.52,0.53,0.53
disgust,0.48,0.48,0.48
fear,0.58,0.77,0.66
joy,0.75,0.85,0.8
sadness,0.51,0.64,0.57
surprise,0.5,0.67,0.58
neutral,0.66,0.53,0.59
MACRO-AVERAGE,0.57,0.64,0.6


In [88]:
# Handling empty predictions
y_pred_labels_val_ekman_opt_n = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.52,0.54,0.53
disgust,0.41,0.45,0.43
fear,0.61,0.65,0.63
joy,0.77,0.85,0.81
sadness,0.52,0.65,0.57
surprise,0.49,0.67,0.57
neutral,0.67,0.54,0.6
MACRO-AVERAGE,0.57,0.62,0.59


### Saving the Model

In [91]:
PATH = "BERT_ekman_no_neu_2.pt"
torch.save(model_GE_no_neu.state_dict(), PATH)


#### Loading the model

In [92]:
PATH = "BERT_GoEmotion_no_neu_2.pt"
model_GE_no_neu_2 = BERTClass()
model_GE_no_neu_2.load_state_dict(torch.load(PATH))
model_GE_no_neu_2.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 3

In [93]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 32
TEST_BATCH_SIZE =32
EPOCHS = 4
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [94]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [95]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [96]:
test_GE_Sampled.columns

Index(['text', 'admiration', 'amusement', 'anger', 'annoyance', 'approval',
       'caring', 'confusion', 'curiosity', 'desire', 'disappointment',
       'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
       'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride',
       'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'],
      dtype='object')

In [97]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [98]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [99]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [100]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [101]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [102]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7534167170524597
Epoch: 2, Loss:  0.1689392477273941
Epoch: 3, Loss:  0.14351534843444824
Epoch: 4, Loss:  0.13346342742443085


In [103]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [104]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [105]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman = np.array(y_pred_proba_train_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9012439530062197
F1 Score (Micro) = 0.9419322687582606
F1 Score (Macro) = 0.9156289816939118


In [106]:
cr = classification_report(targets_train,y_pred_train_ekman,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.94      0.89      0.92      5579
     disgust       0.90      0.75      0.82       793
        fear       0.96      0.91      0.94       726
         joy       0.98      0.96      0.97     17410
     sadness       0.93      0.92      0.92      3263
    surprise       0.93      0.87      0.90      5367
     neutral       0.99      0.90      0.94     14219

   micro avg       0.97      0.92      0.94     47357
   macro avg       0.95      0.89      0.92     47357
weighted avg       0.97      0.92      0.94     47357
 samples avg       0.97      0.94      0.95     47357



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [107]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman = np.array(y_pred_proba_test_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5960936060438548
F1 Score (Micro) = 0.6693344932579383
F1 Score (Macro) = 0.6015477014459124


In [108]:
# Model evaluation

model_eval(y_test_ekman, y_pred_test_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.53,0.52,0.52
disgust,0.61,0.38,0.47
fear,0.64,0.63,0.64
joy,0.8,0.82,0.81
sadness,0.6,0.56,0.58
surprise,0.58,0.55,0.57
neutral,0.67,0.59,0.63
MACRO-AVERAGE,0.63,0.58,0.6


In [109]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman = np.array(y_pred_proba_val_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5967563582749723
F1 Score (Micro) = 0.6735173735086649
F1 Score (Macro) = 0.5979758129152594


In [110]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.56,0.55,0.55
disgust,0.51,0.36,0.42
fear,0.77,0.51,0.62
joy,0.81,0.81,0.81
sadness,0.65,0.57,0.61
surprise,0.58,0.55,0.56
neutral,0.65,0.58,0.61
MACRO-AVERAGE,0.65,0.56,0.6


In [111]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [112]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [113]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [114]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_opt, threshold_test_ekman_opt, macro_f1_test_ekman_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_ekman)
print("The model's threshold is {}".format(threshold_test_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_opt))

The model's threshold is 0.3699999999999999
The model's best macro-f1 is 0.6149231547257402


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [115]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_opt, threshold_val_ekman_opt, macro_f1_val_ekman_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_ekman)
print("The model's threshold is {}".format(threshold_val_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_opt))

The model's threshold is 0.2699999999999999
The model's best macro-f1 is 0.609077394301868


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [116]:
# Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.59,0.54
disgust,0.56,0.44,0.49
fear,0.6,0.71,0.65
joy,0.78,0.84,0.81
sadness,0.58,0.59,0.58
surprise,0.56,0.61,0.58
neutral,0.65,0.64,0.64
MACRO-AVERAGE,0.6,0.63,0.61


In [117]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.65,0.56
disgust,0.42,0.47,0.44
fear,0.68,0.55,0.61
joy,0.77,0.86,0.81
sadness,0.58,0.63,0.61
surprise,0.52,0.65,0.58
neutral,0.62,0.69,0.66
MACRO-AVERAGE,0.58,0.64,0.61


In [118]:
# Handling empty predictions
y_pred_labels_test_ekman_opt_n = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.59,0.54
disgust,0.56,0.44,0.49
fear,0.6,0.71,0.65
joy,0.78,0.84,0.81
sadness,0.58,0.59,0.58
surprise,0.56,0.61,0.58
neutral,0.64,0.65,0.65
MACRO-AVERAGE,0.6,0.63,0.62


### Saving the Model

In [119]:
PATH = "BERT_ekman_no_neu_4.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [120]:
PATH = "BERT_ekman_no_neu_4.pt"
model_ekman_2 = BERTClass()
model_ekman_2.load_state_dict(torch.load(PATH))
model_ekman_2.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RuntimeError: Error(s) in loading state_dict for BERTClass:
	size mismatch for l3.weight: copying a param with shape torch.Size([27, 768]) from checkpoint, the shape in current model is torch.Size([7, 768]).
	size mismatch for l3.bias: copying a param with shape torch.Size([27]) from checkpoint, the shape in current model is torch.Size([7]).

###  Model 4


In [121]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 8
VALIDATION_BATCH_SIZE = 8
TEST_BATCH_SIZE =8
EPOCHS = 4
LEARNING_RATE = 3.e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [122]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [123]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [124]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [125]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [126]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model_GE_no_neu = BERTClass()
model_GE_no_neu.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [127]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model_GE_no_neu.parameters(), lr=LEARNING_RATE)

In [128]:
def train(epoch,model):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [129]:
for epoch in range(EPOCHS):
    train(epoch, model_GE_no_neu)

Epoch: 1, Loss:  0.7090315222740173
Epoch: 1, Loss:  0.2951771318912506
Epoch: 2, Loss:  0.1869005560874939
Epoch: 2, Loss:  0.12390400469303131
Epoch: 3, Loss:  0.24793168902397156
Epoch: 3, Loss:  0.09864102303981781
Epoch: 4, Loss:  0.08593055605888367
Epoch: 4, Loss:  0.15928852558135986


In [130]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [131]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [132]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_GE_no_neu, targets_train = validation(train_loader,model_GE_no_neu) # epoch
y_pred_train_GE_no_neu = np.array(y_pred_proba_train_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.905137065192352
F1 Score (Micro) = 0.9468248641392599
F1 Score (Macro) = 0.9232241617196509


In [133]:
cr = classification_report(targets_train,y_pred_train_GE_no_neu,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.95      0.90      0.92      5579
     disgust       0.91      0.80      0.85       793
        fear       0.95      0.91      0.93       726
         joy       0.99      0.96      0.97     17410
     sadness       0.96      0.90      0.93      3263
    surprise       0.95      0.87      0.90      5367
     neutral       0.98      0.93      0.95     14219

   micro avg       0.97      0.92      0.95     47357
   macro avg       0.95      0.89      0.92     47357
weighted avg       0.97      0.92      0.95     47357
 samples avg       0.97      0.95      0.96     47357



  _warn_prf(average, modifier, msg_start, len(result))


##### Model evaluation
model_eval(targets_train, y_pred_train_GE_no_neu, GE_taxonomy_no_neu)

In [134]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_GE_no_neu, targets_test = validation(test_loader,model_GE_no_neu) # epoch
y_pred_test_GE_no_neu = np.array(y_pred_proba_test_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5920398009950248
F1 Score (Micro) = 0.6690821256038647
F1 Score (Macro) = 0.594640668228177


In [135]:
# Model evaluation
model_eval(y_test_ekman, y_pred_test_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.53,0.51,0.52
disgust,0.53,0.37,0.43
fear,0.63,0.63,0.63
joy,0.81,0.79,0.8
sadness,0.63,0.53,0.57
surprise,0.58,0.54,0.56
neutral,0.64,0.65,0.65
MACRO-AVERAGE,0.62,0.57,0.59


In [136]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_GE_no_neu, targets_val = validation(validation_loader,model_GE_no_neu) # epoch
y_pred_val_GE_no_neu = np.array(y_pred_proba_val_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5923332104681165
F1 Score (Micro) = 0.6752033223741131
F1 Score (Macro) = 0.5846863406655876


In [137]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.54,0.52,0.53
disgust,0.44,0.34,0.38
fear,0.72,0.48,0.57
joy,0.83,0.8,0.81
sadness,0.67,0.56,0.61
surprise,0.57,0.51,0.54
neutral,0.64,0.64,0.64
MACRO-AVERAGE,0.63,0.55,0.58


In [138]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [139]:
# Generate labels for train, test and val
y_pred_train_labels_no_neu = proba_to_labels(y_pred_proba_train_GE_no_neu)

y_pred_test_labels_no_neu = proba_to_labels(y_pred_proba_test_GE_no_neu)

y_pred_val_labels_no_neu = proba_to_labels(y_pred_proba_val_GE_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [140]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [141]:
##### Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_GE_no_neu_opt, threshold_test_GE_no_neu_opt, macro_f1_test_GE_no_neu_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_GE_no_neu)
print("The model's threshold is {}".format(threshold_test_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_GE_no_neu_opt))

The model's threshold is 0.24999999999999992
The model's best macro-f1 is 0.6031832060695648


In [142]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_GE_no_neu_opt, threshold_val_GE_no_neu_opt, macro_f1_val_GE_no_neu_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_GE_no_neu)
print("The model's threshold is {}".format(threshold_val_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_GE_no_neu_opt))

The model's threshold is 0.2599999999999999
The model's best macro-f1 is 0.5968763224482921


In [143]:
#####  Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.48,0.61,0.54
disgust,0.44,0.46,0.45
fear,0.57,0.69,0.63
joy,0.76,0.86,0.8
sadness,0.54,0.58,0.56
surprise,0.5,0.71,0.59
neutral,0.57,0.77,0.66
MACRO-AVERAGE,0.55,0.67,0.6


In [144]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.47,0.62,0.54
disgust,0.38,0.45,0.42
fear,0.61,0.58,0.6
joy,0.77,0.84,0.81
sadness,0.58,0.61,0.6
surprise,0.49,0.66,0.56
neutral,0.58,0.77,0.66
MACRO-AVERAGE,0.56,0.65,0.6


In [145]:
y_pred_labels_test_GE_no_neu_opt.shape, y_pred_labels_val_GE_no_neu_opt.shape

((5427, 7), (5426, 7))

In [146]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_GE_no_neu_opt, axis=1)==0)

5

In [147]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_GE_no_neu_opt, axis=1)==0)

5

In [148]:
# Handling empty predictions for test
y_pred_labels_test_GE_no_neu_opt_h = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.48,0.61,0.54
disgust,0.44,0.46,0.45
fear,0.57,0.69,0.63
joy,0.76,0.86,0.8
sadness,0.54,0.58,0.56
surprise,0.5,0.71,0.59
neutral,0.57,0.77,0.66
MACRO-AVERAGE,0.55,0.67,0.6


In [149]:
# Handling empty predictions for val
y_pred_labels_val_GE_no_neu_opt_h = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.47,0.62,0.54
disgust,0.38,0.45,0.42
fear,0.61,0.58,0.6
joy,0.77,0.84,0.81
sadness,0.58,0.61,0.6
surprise,0.49,0.66,0.56
neutral,0.58,0.77,0.66
MACRO-AVERAGE,0.56,0.65,0.6


In [152]:
# Handling empty predictions
y_pred_labels_test_GE_no_neu_opt_n = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_GE_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.48,0.61,0.54
disgust,0.44,0.46,0.45
fear,0.57,0.69,0.63
joy,0.76,0.86,0.8
sadness,0.54,0.58,0.56
surprise,0.5,0.71,0.59
neutral,0.57,0.77,0.66
MACRO-AVERAGE,0.55,0.67,0.6


In [154]:
# Handling empty predictions
y_pred_labels_val_GE_no_neu_opt_n = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_GE_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.47,0.62,0.54
disgust,0.38,0.45,0.42
fear,0.61,0.58,0.6
joy,0.77,0.84,0.81
sadness,0.58,0.61,0.6
surprise,0.49,0.66,0.56
neutral,0.58,0.78,0.66
MACRO-AVERAGE,0.56,0.65,0.6


### Saving the Model

In [150]:
PATH = "BERT_GoEmotion_ekman_5.pt"
torch.save(model_GE_no_neu.state_dict(), PATH)


#### Loading the model

In [151]:
PATH = "BERT_GoEmotion_ekman_5.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 5


In [54]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 16
VALIDATION_BATCH_SIZE = 16
TEST_BATCH_SIZE =16
EPOCHS = 6
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [55]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [56]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [57]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [58]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [59]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model_GE_no_neu = BERTClass()
model_GE_no_neu.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [60]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model_GE_no_neu.parameters(), lr=LEARNING_RATE)

In [61]:
def train(epoch,model):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [62]:
for epoch in range(EPOCHS):
    train(epoch, model_GE_no_neu)

Epoch: 1, Loss:  0.7721126079559326
Epoch: 2, Loss:  0.25197577476501465
Epoch: 3, Loss:  0.173269584774971
Epoch: 4, Loss:  0.09753710776567459
Epoch: 5, Loss:  0.07888501137495041
Epoch: 6, Loss:  0.025632422417402267


In [63]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [64]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [65]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_GE_no_neu, targets_train = validation(train_loader,model_GE_no_neu) # epoch
y_pred_train_GE_no_neu = np.array(y_pred_proba_train_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9352683713430086
F1 Score (Micro) = 0.9663098202831053
F1 Score (Macro) = 0.9544700565069869


In [66]:
cr = classification_report(targets_train,y_pred_train_GE_no_neu,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.98      0.94      0.96      5579
     disgust       0.98      0.88      0.92       793
        fear       0.97      0.92      0.95       726
         joy       1.00      0.96      0.98     17410
     sadness       0.98      0.94      0.96      3263
    surprise       0.99      0.91      0.95      5367
     neutral       0.98      0.96      0.97     14219

   micro avg       0.99      0.95      0.97     47357
   macro avg       0.98      0.93      0.95     47357
weighted avg       0.99      0.95      0.97     47357
 samples avg       0.99      0.97      0.97     47357



  _warn_prf(average, modifier, msg_start, len(result))


##### Model evaluation
model_eval(targets_train, y_pred_train_GE_no_neu, GE_taxonomy_no_neu)

In [67]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_GE_no_neu, targets_test = validation(test_loader,model_GE_no_neu) # epoch
y_pred_test_GE_no_neu = np.array(y_pred_proba_test_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5900128984706099
F1 Score (Micro) = 0.6686556985764214
F1 Score (Macro) = 0.6036584448230482


In [68]:
# Model evaluation
model_eval(y_test_ekman, y_pred_test_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.53,0.5,0.52
disgust,0.56,0.43,0.49
fear,0.6,0.7,0.65
joy,0.83,0.78,0.81
sadness,0.61,0.56,0.59
surprise,0.58,0.5,0.53
neutral,0.61,0.69,0.65
MACRO-AVERAGE,0.62,0.6,0.6


In [69]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_GE_no_neu, targets_val = validation(validation_loader,model_GE_no_neu) # epoch
y_pred_val_GE_no_neu = np.array(y_pred_proba_val_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5903059343899743
F1 Score (Micro) = 0.6690573770491802
F1 Score (Macro) = 0.5831255166596655


In [70]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.54,0.52,0.53
disgust,0.41,0.38,0.4
fear,0.63,0.56,0.59
joy,0.85,0.77,0.81
sadness,0.64,0.59,0.61
surprise,0.56,0.46,0.5
neutral,0.6,0.7,0.65
MACRO-AVERAGE,0.6,0.57,0.58


In [71]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [72]:
# Generate labels for train, test and val
y_pred_train_labels_no_neu = proba_to_labels(y_pred_proba_train_GE_no_neu)

y_pred_test_labels_no_neu = proba_to_labels(y_pred_proba_test_GE_no_neu)

y_pred_val_labels_no_neu = proba_to_labels(y_pred_proba_val_GE_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [73]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [74]:
##### Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_GE_no_neu_opt, threshold_test_GE_no_neu_opt, macro_f1_test_GE_no_neu_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_GE_no_neu)
print("The model's threshold is {}".format(threshold_test_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_GE_no_neu_opt))

The model's threshold is 0.5699999999999997
The model's best macro-f1 is 0.6038576567455445


In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_GE_no_neu_opt, threshold_val_GE_no_neu_opt, macro_f1_val_GE_no_neu_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_GE_no_neu)
print("The model's threshold is {}".format(threshold_val_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_GE_no_neu_opt))

The model's threshold is 0.24999999999999992
The model's best macro-f1 is 0.5980831580983362


In [76]:
#####  Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.55,0.48,0.51
disgust,0.59,0.42,0.49
fear,0.62,0.69,0.66
joy,0.84,0.77,0.8
sadness,0.63,0.55,0.59
surprise,0.59,0.48,0.53
neutral,0.62,0.67,0.64
MACRO-AVERAGE,0.63,0.58,0.6


In [77]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.58,0.54
disgust,0.38,0.48,0.42
fear,0.6,0.67,0.63
joy,0.82,0.8,0.81
sadness,0.56,0.62,0.59
surprise,0.51,0.57,0.53
neutral,0.58,0.77,0.66
MACRO-AVERAGE,0.56,0.64,0.6


In [78]:
y_pred_labels_test_GE_no_neu_opt.shape, y_pred_labels_val_GE_no_neu_opt.shape

((5427, 7), (5426, 7))

In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_GE_no_neu_opt, axis=1)==0)

153

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_GE_no_neu_opt, axis=1)==0)

1

In [81]:
# Handling empty predictions for test
y_pred_labels_test_GE_no_neu_opt_h = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.48,0.52,0.5
disgust,0.59,0.42,0.49
fear,0.62,0.69,0.66
joy,0.84,0.77,0.8
sadness,0.63,0.55,0.59
surprise,0.59,0.48,0.53
neutral,0.62,0.67,0.64
MACRO-AVERAGE,0.62,0.59,0.6


In [82]:
# Handling empty predictions for val
y_pred_labels_val_GE_no_neu_opt_h = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.58,0.54
disgust,0.38,0.48,0.42
fear,0.6,0.67,0.63
joy,0.82,0.8,0.81
sadness,0.56,0.62,0.59
surprise,0.51,0.57,0.53
neutral,0.58,0.77,0.66
MACRO-AVERAGE,0.56,0.64,0.6


In [83]:
# Handling empty predictions
y_pred_labels_test_GE_no_neu_opt_n = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_GE_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.55,0.48,0.51
disgust,0.59,0.42,0.49
fear,0.62,0.69,0.66
joy,0.84,0.77,0.8
sadness,0.63,0.55,0.59
surprise,0.59,0.48,0.53
neutral,0.6,0.71,0.65
MACRO-AVERAGE,0.63,0.59,0.6


In [84]:
# Handling empty predictions
y_pred_labels_val_GE_no_neu_opt_n = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_GE_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.58,0.54
disgust,0.38,0.48,0.42
fear,0.6,0.67,0.63
joy,0.82,0.8,0.81
sadness,0.56,0.62,0.59
surprise,0.51,0.57,0.53
neutral,0.58,0.77,0.66
MACRO-AVERAGE,0.56,0.64,0.6


### Saving the Model

In [85]:
PATH = "BERT_GoEmotion_ekman_5_neu.pt"
torch.save(model_GE_no_neu.state_dict(), PATH)


#### Loading the model

In [86]:
PATH = "BERT_GoEmotion_ekman_5_neu.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 6


In [54]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 8
VALIDATION_BATCH_SIZE = 8
TEST_BATCH_SIZE =8
EPOCHS = 6
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [55]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [56]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [57]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [58]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [59]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model_GE_no_neu = BERTClass()
model_GE_no_neu.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [60]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model_GE_no_neu.parameters(), lr=LEARNING_RATE)

In [61]:
def train(epoch,model):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [62]:
for epoch in range(EPOCHS):
    train(epoch, model_GE_no_neu)

Epoch: 1, Loss:  0.7630488276481628
Epoch: 1, Loss:  0.310396671295166
Epoch: 2, Loss:  0.26092272996902466
Epoch: 2, Loss:  0.2029384970664978
Epoch: 3, Loss:  0.19415324926376343
Epoch: 3, Loss:  0.27960506081581116
Epoch: 4, Loss:  0.11883809417486191
Epoch: 4, Loss:  0.09009787440299988
Epoch: 5, Loss:  0.0760820284485817
Epoch: 5, Loss:  0.2884007394313812
Epoch: 6, Loss:  0.03203793242573738
Epoch: 6, Loss:  0.0654265508055687


In [63]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [64]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [65]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_GE_no_neu, targets_train = validation(train_loader,model_GE_no_neu) # epoch
y_pred_train_GE_no_neu = np.array(y_pred_proba_train_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9323197419949321
F1 Score (Micro) = 0.9646764613941857
F1 Score (Macro) = 0.9491681355067894


In [66]:
cr = classification_report(targets_train,y_pred_train_GE_no_neu,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.98      0.93      0.95      5579
     disgust       0.90      0.89      0.90       793
        fear       0.97      0.94      0.96       726
         joy       0.99      0.97      0.98     17410
     sadness       0.97      0.93      0.95      3263
    surprise       0.98      0.91      0.94      5367
     neutral       0.99      0.94      0.97     14219

   micro avg       0.99      0.94      0.96     47357
   macro avg       0.97      0.93      0.95     47357
weighted avg       0.99      0.94      0.96     47357
 samples avg       0.99      0.97      0.97     47357



  _warn_prf(average, modifier, msg_start, len(result))


##### Model evaluation
model_eval(targets_train, y_pred_train_GE_no_neu, GE_taxonomy_no_neu)

In [67]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_GE_no_neu, targets_test = validation(test_loader,model_GE_no_neu) # epoch
y_pred_test_GE_no_neu = np.array(y_pred_proba_test_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5785885387875438
F1 Score (Micro) = 0.6624862299805101
F1 Score (Macro) = 0.5826513066150728


In [68]:
# Model evaluation
model_eval(y_test_ekman, y_pred_test_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.53,0.5,0.51
disgust,0.41,0.46,0.44
fear,0.54,0.69,0.6
joy,0.79,0.82,0.81
sadness,0.57,0.52,0.55
surprise,0.56,0.51,0.53
neutral,0.64,0.64,0.64
MACRO-AVERAGE,0.58,0.59,0.58


In [69]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_GE_no_neu, targets_val = validation(validation_loader,model_GE_no_neu) # epoch
y_pred_val_GE_no_neu = np.array(y_pred_proba_val_GE_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_GE_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_GE_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5820125322521195
F1 Score (Micro) = 0.6644692359702502
F1 Score (Macro) = 0.5673608374048844


In [70]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_GE_no_neu, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.53,0.53,0.53
disgust,0.29,0.45,0.36
fear,0.6,0.5,0.54
joy,0.81,0.81,0.81
sadness,0.62,0.55,0.58
surprise,0.55,0.49,0.52
neutral,0.62,0.64,0.63
MACRO-AVERAGE,0.57,0.57,0.57


In [71]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [72]:
# Generate labels for train, test and val
y_pred_train_labels_no_neu = proba_to_labels(y_pred_proba_train_GE_no_neu)

y_pred_test_labels_no_neu = proba_to_labels(y_pred_proba_test_GE_no_neu)

y_pred_val_labels_no_neu = proba_to_labels(y_pred_proba_val_GE_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [73]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [74]:
##### Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_GE_no_neu_opt, threshold_test_GE_no_neu_opt, macro_f1_test_GE_no_neu_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_GE_no_neu)
print("The model's threshold is {}".format(threshold_test_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_GE_no_neu_opt))

The model's threshold is 0.6099999999999998
The model's best macro-f1 is 0.5878274344087734


In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_GE_no_neu_opt, threshold_val_GE_no_neu_opt, macro_f1_val_GE_no_neu_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_GE_no_neu)
print("The model's threshold is {}".format(threshold_val_GE_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_GE_no_neu_opt))

The model's threshold is 0.29999999999999993
The model's best macro-f1 is 0.5701638422622907


In [76]:
#####  Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.54,0.47,0.5
disgust,0.45,0.46,0.46
fear,0.58,0.67,0.63
joy,0.8,0.81,0.81
sadness,0.62,0.52,0.57
surprise,0.58,0.48,0.53
neutral,0.65,0.62,0.63
MACRO-AVERAGE,0.61,0.58,0.59


In [77]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.58,0.54
disgust,0.26,0.48,0.34
fear,0.56,0.52,0.54
joy,0.78,0.84,0.81
sadness,0.6,0.6,0.6
surprise,0.51,0.55,0.53
neutral,0.59,0.69,0.64
MACRO-AVERAGE,0.54,0.61,0.57


In [78]:
y_pred_labels_test_GE_no_neu_opt.shape, y_pred_labels_val_GE_no_neu_opt.shape

((5427, 7), (5426, 7))

In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_GE_no_neu_opt, axis=1)==0)

176

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_GE_no_neu_opt, axis=1)==0)

3

In [81]:
# Handling empty predictions for test
y_pred_labels_test_GE_no_neu_opt_h = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.47,0.52,0.49
disgust,0.45,0.46,0.46
fear,0.58,0.67,0.63
joy,0.8,0.81,0.81
sadness,0.62,0.52,0.57
surprise,0.58,0.48,0.53
neutral,0.65,0.62,0.63
MACRO-AVERAGE,0.59,0.58,0.59


In [82]:
# Handling empty predictions for val
y_pred_labels_val_GE_no_neu_opt_h = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_GE_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_GE_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.58,0.54
disgust,0.26,0.48,0.34
fear,0.56,0.52,0.54
joy,0.78,0.84,0.81
sadness,0.6,0.6,0.6
surprise,0.51,0.55,0.53
neutral,0.59,0.69,0.64
MACRO-AVERAGE,0.54,0.61,0.57


In [83]:
# Handling empty predictions
y_pred_labels_test_GE_no_neu_opt_n = np.copy(y_pred_labels_test_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_GE_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_GE_no_neu_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.54,0.47,0.5
disgust,0.45,0.46,0.46
fear,0.58,0.67,0.63
joy,0.8,0.81,0.81
sadness,0.62,0.52,0.57
surprise,0.58,0.48,0.53
neutral,0.62,0.66,0.64
MACRO-AVERAGE,0.6,0.58,0.59


In [84]:
# Handling empty predictions
y_pred_labels_val_GE_no_neu_opt_n = np.copy(y_pred_labels_val_GE_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_GE_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_GE_no_neu_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.5,0.58,0.54
disgust,0.26,0.48,0.34
fear,0.56,0.52,0.54
joy,0.78,0.84,0.81
sadness,0.6,0.6,0.6
surprise,0.51,0.55,0.53
neutral,0.59,0.69,0.64
MACRO-AVERAGE,0.54,0.61,0.57


### Saving the Model

In [85]:
PATH = "BERT_GoEmotion_ekman_6_neu.pt"
torch.save(model_GE_no_neu.state_dict(), PATH)


#### Loading the model

In [86]:
PATH = "BERT_GoEmotion_ekman_6_neu.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 7

In [54]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 32
TEST_BATCH_SIZE =32
EPOCHS = 6
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [55]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [56]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [57]:
test_GE_Sampled.columns

Index(['text', 'admiration', 'amusement', 'anger', 'annoyance', 'approval',
       'caring', 'confusion', 'curiosity', 'desire', 'disappointment',
       'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
       'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride',
       'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'],
      dtype='object')

In [58]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [60]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [61]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [62]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [63]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7545009255409241
Epoch: 2, Loss:  0.193465918302536
Epoch: 3, Loss:  0.16060632467269897
Epoch: 4, Loss:  0.09308058768510818
Epoch: 5, Loss:  0.08169709146022797
Epoch: 6, Loss:  0.10407782346010208


In [64]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [65]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [66]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman = np.array(y_pred_proba_train_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.940036857866851
F1 Score (Micro) = 0.969339800187123
F1 Score (Macro) = 0.9562468861422807


In [67]:
cr = classification_report(targets_train,y_pred_train_ekman,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.99      0.91      0.95      5579
     disgust       0.94      0.90      0.92       793
        fear       0.97      0.95      0.96       726
         joy       0.99      0.98      0.98     17410
     sadness       0.96      0.94      0.95      3263
    surprise       0.98      0.94      0.96      5367
     neutral       0.99      0.95      0.97     14219

   micro avg       0.99      0.95      0.97     47357
   macro avg       0.98      0.94      0.96     47357
weighted avg       0.99      0.95      0.97     47357
 samples avg       0.99      0.97      0.98     47357



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [68]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman = np.array(y_pred_proba_test_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5798783858485351
F1 Score (Micro) = 0.6659338801048449
F1 Score (Macro) = 0.5944105845642067


In [69]:
# Model evaluation

model_eval(y_test_ekman, y_pred_test_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.59,0.44,0.5
disgust,0.5,0.41,0.45
fear,0.56,0.73,0.63
joy,0.79,0.82,0.81
sadness,0.55,0.59,0.57
surprise,0.53,0.59,0.56
neutral,0.63,0.64,0.63
MACRO-AVERAGE,0.59,0.6,0.59


In [70]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman = np.array(y_pred_proba_val_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5823811279026907
F1 Score (Micro) = 0.671563660925363
F1 Score (Macro) = 0.5873791391892039


In [71]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.59,0.47,0.52
disgust,0.4,0.4,0.4
fear,0.58,0.59,0.58
joy,0.8,0.82,0.81
sadness,0.6,0.63,0.62
surprise,0.52,0.58,0.55
neutral,0.62,0.63,0.62
MACRO-AVERAGE,0.59,0.59,0.59


In [72]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [73]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [74]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_opt, threshold_test_ekman_opt, macro_f1_test_ekman_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_ekman)
print("The model's threshold is {}".format(threshold_test_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_opt))

The model's threshold is 0.6499999999999997
The model's best macro-f1 is 0.5978298181490977


In [76]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_opt, threshold_val_ekman_opt, macro_f1_val_ekman_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_ekman)
print("The model's threshold is {}".format(threshold_val_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_opt))

The model's threshold is 0.3899999999999999
The model's best macro-f1 is 0.5962845618720678


In [77]:
# Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.62,0.42,0.5
disgust,0.56,0.4,0.46
fear,0.61,0.71,0.66
joy,0.81,0.8,0.81
sadness,0.6,0.56,0.58
surprise,0.56,0.54,0.55
neutral,0.65,0.6,0.62
MACRO-AVERAGE,0.63,0.58,0.6


In [78]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.58,0.51,0.54
disgust,0.39,0.44,0.42
fear,0.58,0.65,0.61
joy,0.79,0.83,0.81
sadness,0.57,0.65,0.61
surprise,0.5,0.6,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.58,0.62,0.6


In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_opt, axis=1)==0)

272

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_opt, axis=1)==0)

42

In [81]:
# Handling empty predictions for test
y_pred_labels_test_ekman_opt_h = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_ekman_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_ekman_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.48,0.5,0.49
disgust,0.56,0.4,0.46
fear,0.61,0.71,0.66
joy,0.81,0.8,0.81
sadness,0.6,0.56,0.58
surprise,0.56,0.54,0.55
neutral,0.65,0.6,0.62
MACRO-AVERAGE,0.61,0.59,0.6


In [82]:
# Handling empty predictions for val
y_pred_labels_val_ekman_opt_h = np.copy(y_pred_labels_val_ekman_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_ekman_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_ekman_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.55,0.52,0.54
disgust,0.39,0.44,0.42
fear,0.58,0.65,0.61
joy,0.79,0.83,0.81
sadness,0.57,0.65,0.61
surprise,0.5,0.6,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.57,0.62,0.6


In [83]:
# Handling empty predictions
y_pred_labels_test_ekman_opt_n = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.62,0.42,0.5
disgust,0.56,0.4,0.46
fear,0.61,0.71,0.66
joy,0.81,0.8,0.81
sadness,0.6,0.56,0.58
surprise,0.56,0.54,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.62,0.59,0.6


In [84]:
# Handling empty predictions
y_pred_labels_val_ekman_opt_n = np.copy(y_pred_labels_val_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.58,0.51,0.54
disgust,0.39,0.44,0.42
fear,0.58,0.65,0.61
joy,0.79,0.83,0.81
sadness,0.57,0.65,0.61
surprise,0.5,0.6,0.55
neutral,0.61,0.67,0.64
MACRO-AVERAGE,0.58,0.62,0.6


### Saving the Model

In [86]:
PATH = "BERT_GoEmotion_ekman_7_neu.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [87]:
PATH = "BERT_GoEmotion_ekman_7_neu.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 8 - Final

In [54]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 32
TEST_BATCH_SIZE =32
EPOCHS = 5
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [55]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [56]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [57]:
test_GE_Sampled.columns

Index(['text', 'admiration', 'amusement', 'anger', 'annoyance', 'approval',
       'caring', 'confusion', 'curiosity', 'desire', 'disappointment',
       'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
       'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride',
       'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'],
      dtype='object')

In [58]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [60]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [61]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [62]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [63]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7545009255409241
Epoch: 2, Loss:  0.193465918302536
Epoch: 3, Loss:  0.16060632467269897
Epoch: 4, Loss:  0.09308058768510818
Epoch: 5, Loss:  0.08169709146022797


In [64]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [65]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [66]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman = np.array(y_pred_proba_train_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9188435844275512
F1 Score (Micro) = 0.9554364504765404
F1 Score (Macro) = 0.9307411001362021


In [67]:
cr = classification_report(targets_train,y_pred_train_ekman,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.94      0.94      0.94      5579
     disgust       0.97      0.74      0.84       793
        fear       0.96      0.90      0.93       726
         joy       0.99      0.97      0.98     17410
     sadness       0.96      0.92      0.94      3263
    surprise       0.96      0.91      0.93      5367
     neutral       1.00      0.92      0.95     14219

   micro avg       0.98      0.93      0.96     47357
   macro avg       0.97      0.90      0.93     47357
weighted avg       0.98      0.93      0.96     47357
 samples avg       0.98      0.96      0.96     47357



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [68]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman = np.array(y_pred_proba_test_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5822738161046619
F1 Score (Micro) = 0.6622493761294209
F1 Score (Macro) = 0.615380325413795


In [69]:
# Model evaluation

model_eval(y_test_ekman, y_pred_test_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.51,0.58,0.54
disgust,0.73,0.41,0.53
fear,0.65,0.73,0.69
joy,0.78,0.82,0.8
sadness,0.57,0.6,0.58
surprise,0.54,0.58,0.56
neutral,0.69,0.53,0.6
MACRO-AVERAGE,0.64,0.61,0.62


In [70]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman = np.array(y_pred_proba_val_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5851455952819756
F1 Score (Micro) = 0.6658682634730538
F1 Score (Macro) = 0.5901255726274922


In [71]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.51,0.59,0.55
disgust,0.51,0.34,0.41
fear,0.67,0.56,0.61
joy,0.8,0.82,0.81
sadness,0.59,0.61,0.6
surprise,0.54,0.58,0.56
neutral,0.66,0.54,0.6
MACRO-AVERAGE,0.61,0.58,0.59


In [72]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [73]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [74]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_opt, threshold_test_ekman_opt, macro_f1_test_ekman_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_ekman)
print("The model's threshold is {}".format(threshold_test_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_opt))

The model's threshold is 0.34999999999999987
The model's best macro-f1 is 0.6163615110434285


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [76]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_opt, threshold_val_ekman_opt, macro_f1_val_ekman_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_ekman)
print("The model's threshold is {}".format(threshold_val_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_opt))

The model's threshold is 0.20999999999999996
The model's best macro-f1 is 0.6067082856691685


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [77]:
# Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.64,0.56
disgust,0.65,0.46,0.54
fear,0.61,0.73,0.66
joy,0.76,0.85,0.8
sadness,0.51,0.62,0.56
surprise,0.52,0.65,0.57
neutral,0.66,0.58,0.62
MACRO-AVERAGE,0.6,0.65,0.62


In [78]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.45,0.68,0.54
disgust,0.47,0.51,0.49
fear,0.63,0.67,0.65
joy,0.74,0.86,0.8
sadness,0.52,0.68,0.59
surprise,0.45,0.69,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.55,0.68,0.61


In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_opt, axis=1)==0)

18

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_opt, axis=1)==0)

0

In [81]:
# Handling empty predictions for test
y_pred_labels_test_ekman_opt_h = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_ekman_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_ekman_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.65,0.56
disgust,0.65,0.46,0.54
fear,0.61,0.73,0.66
joy,0.76,0.85,0.8
sadness,0.51,0.62,0.56
surprise,0.52,0.65,0.57
neutral,0.66,0.58,0.62
MACRO-AVERAGE,0.6,0.65,0.62


In [82]:
# Handling empty predictions for val
y_pred_labels_val_ekman_opt_h = np.copy(y_pred_labels_val_ekman_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_ekman_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_ekman_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.45,0.68,0.54
disgust,0.47,0.51,0.49
fear,0.63,0.67,0.65
joy,0.74,0.86,0.8
sadness,0.52,0.68,0.59
surprise,0.45,0.69,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.55,0.68,0.61


In [83]:
# Handling empty predictions
y_pred_labels_test_ekman_opt_n = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.64,0.56
disgust,0.65,0.46,0.54
fear,0.61,0.73,0.66
joy,0.76,0.85,0.8
sadness,0.51,0.62,0.56
surprise,0.52,0.65,0.57
neutral,0.65,0.59,0.62
MACRO-AVERAGE,0.6,0.65,0.62


In [84]:
# Handling empty predictions
y_pred_labels_val_ekman_opt_n = np.copy(y_pred_labels_val_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.45,0.68,0.54
disgust,0.47,0.51,0.49
fear,0.63,0.67,0.65
joy,0.74,0.86,0.8
sadness,0.52,0.68,0.59
surprise,0.45,0.69,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.55,0.68,0.61


### Saving the Model

In [85]:
PATH = "BERT_GoEmotion_ekman_8_neu.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [86]:
PATH = "BERT_GoEmotion_ekman_8_neu.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 1 - Ekman taxonomy without neutral emotion

In [55]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 16
VALIDATION_BATCH_SIZE = 16
TEST_BATCH_SIZE =16
EPOCHS = 4
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [56]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy_no_neu]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [57]:
train_dataset = CustomDataset(
  train_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

In [58]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [60]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 6)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [61]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [62]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [63]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7645882964134216
Epoch: 2, Loss:  0.1541750282049179
Epoch: 3, Loss:  0.15041404962539673
Epoch: 4, Loss:  0.0743437111377716


In [64]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [65]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [66]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman_no_neu, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman_no_neu = np.array(y_pred_proba_train_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9265047242292477
F1 Score (Micro) = 0.9598496286938033
F1 Score (Macro) = 0.9122880081402945


In [67]:
cr = classification_report(targets_train,y_pred_train_ekman_no_neu,target_names= EKMAN_taxonomy_no_neu)
print(cr)

              precision    recall  f1-score   support

       anger       0.96      0.93      0.95      5579
     disgust       0.99      0.62      0.76       793
        fear       0.99      0.84      0.91       726
         joy       0.99      0.98      0.98     17410
     sadness       0.96      0.91      0.93      3263
    surprise       0.98      0.91      0.94      5367

   micro avg       0.98      0.94      0.96     33138
   macro avg       0.98      0.86      0.91     33138
weighted avg       0.98      0.94      0.96     33138
 samples avg       0.98      0.96      0.97     33138



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [68]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman_no_neu, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman_no_neu = np.array(y_pred_proba_test_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.7262496728605077
F1 Score (Micro) = 0.7897498474679683
F1 Score (Macro) = 0.6706858561596535


In [70]:
# Model evaluation

model_eval(y_test_ekman_no_neu, y_pred_test_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.69,0.71,0.7
disgust,0.77,0.33,0.47
fear,0.76,0.54,0.63
joy,0.87,0.92,0.89
sadness,0.67,0.61,0.64
surprise,0.72,0.67,0.69
MACRO-AVERAGE,0.75,0.63,0.67


In [71]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman_no_neu, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman_no_neu = np.array(y_pred_proba_val_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.7310902451747522
F1 Score (Micro) = 0.7965957446808511
F1 Score (Macro) = 0.6644510221717567


In [72]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_val_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.69,0.69,0.69
disgust,0.6,0.3,0.4
fear,0.85,0.49,0.62
joy,0.88,0.92,0.9
sadness,0.74,0.68,0.71
surprise,0.71,0.64,0.68
MACRO-AVERAGE,0.74,0.62,0.66


In [73]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [74]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman_no_neu)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman_no_neu)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [75]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [77]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_no_neu_opt, threshold_test_ekman_no_neu_opt, macro_f1_test_ekman_no_neu_opt = proba_to_labels_opt(y_test_ekman_no_neu, y_pred_proba_test_ekman_no_neu)
print("The model's threshold is {}".format(threshold_test_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_no_neu_opt))

The model's threshold is 0.2699999999999999
The model's best macro-f1 is 0.6763130319026902


  _warn_prf(average, modifier, msg_start, len(result))


In [78]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_no_neu_opt, threshold_val_ekman_no_neu_opt, macro_f1_val_ekman_no_neu_opt = proba_to_labels_opt(y_val_ekman_no_neu, y_pred_proba_val_ekman_no_neu)
print("The model's threshold is {}".format(threshold_val_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_no_neu_opt))

The model's threshold is 0.24999999999999992
The model's best macro-f1 is 0.6815922163154405


  _warn_prf(average, modifier, msg_start, len(result))


In [79]:
# Model evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.65,0.77,0.7
disgust,0.66,0.39,0.49
fear,0.73,0.59,0.66
joy,0.83,0.94,0.88
sadness,0.62,0.65,0.64
surprise,0.68,0.71,0.69
MACRO-AVERAGE,0.69,0.68,0.68


In [80]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.63,0.77,0.69
disgust,0.52,0.4,0.45
fear,0.82,0.55,0.66
joy,0.85,0.94,0.9
sadness,0.67,0.73,0.7
surprise,0.66,0.72,0.69
MACRO-AVERAGE,0.69,0.69,0.68


In [81]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_no_neu_opt, axis=1)==0)

4

In [82]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_no_neu_opt, axis=1)==0)

0

In [83]:
# Handling empty predictions for test
y_pred_labels_test_ekman_no_neu_opt_h = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.64,0.77,0.7
disgust,0.66,0.39,0.49
fear,0.73,0.59,0.66
joy,0.83,0.94,0.88
sadness,0.62,0.65,0.64
surprise,0.68,0.71,0.69
MACRO-AVERAGE,0.69,0.68,0.68


In [84]:
# Handling empty predictions for val
y_pred_labels_val_ekman_no_neu_opt_h = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.63,0.77,0.69
disgust,0.52,0.4,0.45
fear,0.82,0.55,0.66
joy,0.85,0.94,0.9
sadness,0.67,0.73,0.7
surprise,0.66,0.72,0.69
MACRO-AVERAGE,0.69,0.69,0.68


In [85]:
# Handling empty predictions
y_pred_labels_test_ekman_no_neu_opt_n = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.65,0.77,0.7
disgust,0.66,0.39,0.49
fear,0.73,0.59,0.66
joy,0.83,0.94,0.88
sadness,0.62,0.65,0.64
surprise,0.67,0.71,0.69
MACRO-AVERAGE,0.69,0.68,0.68


In [86]:
# Handling empty predictions
y_pred_labels_val_ekman_no_neu_opt_n = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.63,0.77,0.69
disgust,0.52,0.4,0.45
fear,0.82,0.55,0.66
joy,0.85,0.94,0.9
sadness,0.67,0.73,0.7
surprise,0.66,0.72,0.69
MACRO-AVERAGE,0.69,0.69,0.68


### Saving the Model

In [87]:
PATH = "BERT_ekman_no_neu_1.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [88]:
PATH = "BERT_ekman_no_neu_1.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 2 - Ekman taxonomy without neutral emotion

In [55]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 16
VALIDATION_BATCH_SIZE = 16
TEST_BATCH_SIZE =16
EPOCHS = 5
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [56]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy_no_neu]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [57]:
train_dataset = CustomDataset(
  train_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

In [58]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [60]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 6)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [61]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [62]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [63]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7645882964134216
Epoch: 2, Loss:  0.1541750282049179
Epoch: 3, Loss:  0.15041404962539673
Epoch: 4, Loss:  0.0743437111377716
Epoch: 5, Loss:  0.013070906512439251


In [64]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [65]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [66]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman_no_neu, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman_no_neu = np.array(y_pred_proba_train_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9407591460424364
F1 Score (Micro) = 0.9688477341019194
F1 Score (Macro) = 0.9444706934819429


In [67]:
cr = classification_report(targets_train,y_pred_train_ekman_no_neu,target_names= EKMAN_taxonomy_no_neu)
print(cr)

              precision    recall  f1-score   support

       anger       0.99      0.90      0.94      5579
     disgust       0.93      0.84      0.88       793
        fear       0.91      0.97      0.94       726
         joy       0.99      0.99      0.99     17410
     sadness       0.98      0.93      0.96      3263
    surprise       0.99      0.93      0.96      5367

   micro avg       0.98      0.95      0.97     33138
   macro avg       0.97      0.93      0.94     33138
weighted avg       0.98      0.95      0.97     33138
 samples avg       0.99      0.97      0.98     33138



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [68]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman_no_neu, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman_no_neu = np.array(y_pred_proba_test_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.7176131902643287
F1 Score (Micro) = 0.7865550297294018
F1 Score (Macro) = 0.6811360158578855


In [69]:
# Model evaluation

model_eval(y_test_ekman_no_neu, y_pred_test_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.75,0.61,0.67
disgust,0.54,0.44,0.48
fear,0.59,0.82,0.69
joy,0.85,0.93,0.89
sadness,0.67,0.64,0.65
surprise,0.72,0.68,0.7
MACRO-AVERAGE,0.69,0.69,0.68


In [70]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman_no_neu, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman_no_neu = np.array(y_pred_proba_val_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.7190923317683882
F1 Score (Micro) = 0.7920744231001571
F1 Score (Macro) = 0.6747871841255177


In [71]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_val_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.76,0.59,0.66
disgust,0.43,0.45,0.44
fear,0.64,0.72,0.68
joy,0.87,0.93,0.9
sadness,0.7,0.68,0.69
surprise,0.69,0.66,0.67
MACRO-AVERAGE,0.68,0.67,0.67


In [72]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [73]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman_no_neu)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman_no_neu)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [74]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_no_neu_opt, threshold_test_ekman_no_neu_opt, macro_f1_test_ekman_no_neu_opt = proba_to_labels_opt(y_test_ekman_no_neu, y_pred_proba_test_ekman_no_neu)
print("The model's threshold is {}".format(threshold_test_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_no_neu_opt))

The model's threshold is 0.5599999999999997
The model's best macro-f1 is 0.6839292626013669


In [76]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_no_neu_opt, threshold_val_ekman_no_neu_opt, macro_f1_val_ekman_no_neu_opt = proba_to_labels_opt(y_val_ekman_no_neu, y_pred_proba_val_ekman_no_neu)
print("The model's threshold is {}".format(threshold_val_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_no_neu_opt))

The model's threshold is 0.3799999999999999
The model's best macro-f1 is 0.6773015336674835


In [77]:
# Model evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.75,0.6,0.67
disgust,0.55,0.42,0.48
fear,0.62,0.82,0.71
joy,0.86,0.93,0.89
sadness,0.69,0.63,0.66
surprise,0.73,0.68,0.7
MACRO-AVERAGE,0.7,0.68,0.68


In [78]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.75,0.62,0.68
disgust,0.41,0.49,0.45
fear,0.61,0.74,0.67
joy,0.86,0.94,0.9
sadness,0.68,0.71,0.69
surprise,0.68,0.69,0.68
MACRO-AVERAGE,0.66,0.7,0.68


In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_no_neu_opt, axis=1)==0)

75

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_no_neu_opt, axis=1)==0)

20

#### Final

In [81]:
# Handling empty predictions for test
y_pred_labels_test_ekman_no_neu_opt_h = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.72,0.64,0.68
disgust,0.55,0.42,0.48
fear,0.62,0.82,0.71
joy,0.86,0.93,0.89
sadness,0.69,0.63,0.66
surprise,0.73,0.68,0.7
MACRO-AVERAGE,0.7,0.69,0.69


In [82]:
# Handling empty predictions for val
y_pred_labels_val_ekman_no_neu_opt_h = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.73,0.63,0.68
disgust,0.41,0.49,0.45
fear,0.61,0.74,0.67
joy,0.86,0.94,0.9
sadness,0.68,0.71,0.69
surprise,0.68,0.69,0.68
MACRO-AVERAGE,0.66,0.7,0.68


In [83]:
# Handling empty predictions
y_pred_labels_test_ekman_no_neu_opt_n = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.75,0.6,0.67
disgust,0.55,0.42,0.48
fear,0.62,0.82,0.71
joy,0.86,0.93,0.89
sadness,0.69,0.63,0.66
surprise,0.67,0.69,0.68
MACRO-AVERAGE,0.69,0.68,0.68


In [84]:
# Handling empty predictions
y_pred_labels_val_ekman_no_neu_opt_n = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.75,0.62,0.68
disgust,0.41,0.49,0.45
fear,0.61,0.74,0.67
joy,0.86,0.94,0.9
sadness,0.68,0.71,0.69
surprise,0.66,0.7,0.68
MACRO-AVERAGE,0.66,0.7,0.68


### Saving the Model

In [85]:
PATH = "BERT_ekman_no_neu_2.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [86]:
PATH = "BERT_ekman_no_neu_2.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 3 - Ekman taxonomy without neutral emotion

In [55]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 32
TEST_BATCH_SIZE =32
EPOCHS = 4
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [56]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy_no_neu]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [57]:
train_dataset = CustomDataset(
  train_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

In [58]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [60]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 6)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [61]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [62]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [63]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7556139230728149
Epoch: 2, Loss:  0.11735661327838898
Epoch: 3, Loss:  0.12371546030044556
Epoch: 4, Loss:  0.08023136109113693


In [64]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [65]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [66]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman_no_neu, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman_no_neu = np.array(y_pred_proba_train_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9256873835289502
F1 Score (Micro) = 0.959016393442623
F1 Score (Macro) = 0.9182537953670549


In [67]:
cr = classification_report(targets_train,y_pred_train_ekman_no_neu,target_names= EKMAN_taxonomy_no_neu)
print(cr)

              precision    recall  f1-score   support

       anger       0.98      0.90      0.94      5579
     disgust       0.96      0.64      0.77       793
        fear       0.97      0.92      0.94       726
         joy       0.99      0.98      0.98     17410
     sadness       0.94      0.94      0.94      3263
    surprise       0.98      0.89      0.93      5367

   micro avg       0.98      0.94      0.96     33138
   macro avg       0.97      0.88      0.92     33138
weighted avg       0.98      0.94      0.96     33138
 samples avg       0.98      0.96      0.97     33138



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [68]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman_no_neu, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman_no_neu = np.array(y_pred_proba_test_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.7335775974875687
F1 Score (Micro) = 0.7919793332513224
F1 Score (Macro) = 0.6891994006641294


In [69]:
# Model evaluation

model_eval(y_test_ekman_no_neu, y_pred_test_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.74,0.62,0.68
disgust,0.79,0.37,0.51
fear,0.72,0.68,0.7
joy,0.87,0.93,0.9
sadness,0.65,0.67,0.66
surprise,0.74,0.66,0.7
MACRO-AVERAGE,0.75,0.66,0.69


In [70]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman_no_neu, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman_no_neu = np.array(y_pred_proba_val_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.736567553468962
F1 Score (Micro) = 0.8008811650960715
F1 Score (Macro) = 0.680549024192565


In [71]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_val_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.74,0.64,0.69
disgust,0.59,0.31,0.41
fear,0.83,0.64,0.72
joy,0.89,0.92,0.9
sadness,0.67,0.71,0.69
surprise,0.73,0.64,0.68
MACRO-AVERAGE,0.74,0.64,0.68


In [72]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [73]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman_no_neu)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman_no_neu)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [74]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_no_neu_opt, threshold_test_ekman_no_neu_opt, macro_f1_test_ekman_no_neu_opt = proba_to_labels_opt(y_test_ekman_no_neu, y_pred_proba_test_ekman_no_neu)
print("The model's threshold is {}".format(threshold_test_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_no_neu_opt))

The model's threshold is 0.3699999999999999
The model's best macro-f1 is 0.6939776962263972


  _warn_prf(average, modifier, msg_start, len(result))


In [76]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_no_neu_opt, threshold_val_ekman_no_neu_opt, macro_f1_val_ekman_no_neu_opt = proba_to_labels_opt(y_val_ekman_no_neu, y_pred_proba_val_ekman_no_neu)
print("The model's threshold is {}".format(threshold_val_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_no_neu_opt))

The model's threshold is 0.33999999999999986
The model's best macro-f1 is 0.6900663899575932


  _warn_prf(average, modifier, msg_start, len(result))


In [77]:
# Model evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.7,0.66,0.68
disgust,0.78,0.42,0.55
fear,0.68,0.7,0.69
joy,0.85,0.94,0.89
sadness,0.61,0.69,0.65
surprise,0.7,0.71,0.7
MACRO-AVERAGE,0.72,0.69,0.69


In [78]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.7,0.68,0.69
disgust,0.58,0.41,0.48
fear,0.75,0.67,0.71
joy,0.87,0.94,0.9
sadness,0.61,0.75,0.67
surprise,0.69,0.68,0.69
MACRO-AVERAGE,0.7,0.69,0.69


In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_no_neu_opt, axis=1)==0)

7

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_no_neu_opt, axis=1)==0)

9

#### Final

In [81]:
# Handling empty predictions for test
y_pred_labels_test_ekman_no_neu_opt_h = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.7,0.66,0.68
disgust,0.78,0.42,0.55
fear,0.68,0.7,0.69
joy,0.85,0.94,0.89
sadness,0.61,0.69,0.65
surprise,0.7,0.71,0.7
MACRO-AVERAGE,0.72,0.69,0.69


In [82]:
# Handling empty predictions for val
y_pred_labels_val_ekman_no_neu_opt_h = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.69,0.69,0.69
disgust,0.58,0.41,0.48
fear,0.75,0.67,0.71
joy,0.87,0.94,0.9
sadness,0.61,0.75,0.67
surprise,0.69,0.68,0.69
MACRO-AVERAGE,0.7,0.69,0.69


In [83]:
# Handling empty predictions
y_pred_labels_test_ekman_no_neu_opt_n = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.7,0.66,0.68
disgust,0.78,0.42,0.55
fear,0.68,0.7,0.69
joy,0.85,0.94,0.89
sadness,0.61,0.69,0.65
surprise,0.69,0.71,0.7
MACRO-AVERAGE,0.72,0.69,0.69


In [84]:
# Handling empty predictions
y_pred_labels_val_ekman_no_neu_opt_n = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.7,0.68,0.69
disgust,0.58,0.41,0.48
fear,0.75,0.67,0.71
joy,0.87,0.94,0.9
sadness,0.61,0.75,0.67
surprise,0.68,0.69,0.68
MACRO-AVERAGE,0.7,0.69,0.69


### Saving the Model

In [85]:
PATH = "BERT_ekman_no_neu_3.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [86]:
PATH = "BERT_ekman_no_neu_3.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model 4 - Ekman taxonomy without neutral emotion

## Final

In [55]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 32
TEST_BATCH_SIZE =32
EPOCHS = 5
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [56]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy_no_neu]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [57]:
train_dataset = CustomDataset(
  train_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman_no_neu,
  tokenizer,
  max_len=MAX_LEN
)

In [58]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [60]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 6)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [61]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [62]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [63]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7556139230728149
Epoch: 2, Loss:  0.11735661327838898
Epoch: 3, Loss:  0.12371546030044556
Epoch: 4, Loss:  0.08023136109113693
Epoch: 5, Loss:  0.03714669868350029


In [64]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [65]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [66]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman_no_neu, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman_no_neu = np.array(y_pred_proba_train_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9440285088436263
F1 Score (Micro) = 0.9705056826024939
F1 Score (Macro) = 0.9470280114193118


In [67]:
cr = classification_report(targets_train,y_pred_train_ekman_no_neu,target_names= EKMAN_taxonomy_no_neu)
print(cr)

              precision    recall  f1-score   support

       anger       0.98      0.94      0.96      5579
     disgust       0.86      0.89      0.87       793
        fear       0.98      0.94      0.96       726
         joy       1.00      0.98      0.99     17410
     sadness       0.99      0.91      0.95      3263
    surprise       0.99      0.93      0.96      5367

   micro avg       0.99      0.95      0.97     33138
   macro avg       0.97      0.93      0.95     33138
weighted avg       0.99      0.95      0.97     33138
 samples avg       0.99      0.97      0.98     33138



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [68]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman_no_neu, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman_no_neu = np.array(y_pred_proba_test_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.7147343627322691
F1 Score (Micro) = 0.7877392779258541
F1 Score (Macro) = 0.6824480679445012


In [69]:
# Model evaluation

model_eval(y_test_ekman_no_neu, y_pred_test_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.65,0.72,0.68
disgust,0.48,0.5,0.49
fear,0.69,0.69,0.69
joy,0.89,0.91,0.9
sadness,0.67,0.58,0.62
surprise,0.73,0.68,0.7
MACRO-AVERAGE,0.69,0.68,0.68


In [70]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman_no_neu, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman_no_neu = np.array(y_pred_proba_val_ekman_no_neu) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman_no_neu)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman_no_neu, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.7237871674491393
F1 Score (Micro) = 0.7940962980885555
F1 Score (Macro) = 0.6860082930308394


In [71]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_val_ekman_no_neu, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.65,0.73,0.69
disgust,0.41,0.52,0.46
fear,0.8,0.64,0.71
joy,0.9,0.9,0.9
sadness,0.75,0.64,0.69
surprise,0.71,0.63,0.67
MACRO-AVERAGE,0.7,0.68,0.69


In [72]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [73]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman_no_neu)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman_no_neu)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman_no_neu)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [74]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_no_neu_opt, threshold_test_ekman_no_neu_opt, macro_f1_test_ekman_no_neu_opt = proba_to_labels_opt(y_test_ekman_no_neu, y_pred_proba_test_ekman_no_neu)
print("The model's threshold is {}".format(threshold_test_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_no_neu_opt))

The model's threshold is 0.3599999999999999
The model's best macro-f1 is 0.6852697495269314


In [76]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_no_neu_opt, threshold_val_ekman_no_neu_opt, macro_f1_val_ekman_no_neu_opt = proba_to_labels_opt(y_val_ekman_no_neu, y_pred_proba_val_ekman_no_neu)
print("The model's threshold is {}".format(threshold_val_ekman_no_neu_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_no_neu_opt))

The model's threshold is 0.48999999999999977
The model's best macro-f1 is 0.6874568108701261


In [77]:
# Model evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.62,0.76,0.68
disgust,0.45,0.54,0.49
fear,0.67,0.73,0.7
joy,0.88,0.92,0.9
sadness,0.64,0.63,0.64
surprise,0.7,0.7,0.7
MACRO-AVERAGE,0.66,0.72,0.69


In [78]:
# Model evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.65,0.74,0.69
disgust,0.41,0.52,0.46
fear,0.8,0.64,0.71
joy,0.9,0.9,0.9
sadness,0.75,0.64,0.69
surprise,0.72,0.64,0.67
MACRO-AVERAGE,0.7,0.68,0.69


In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_no_neu_opt, axis=1)==0)

12

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_no_neu_opt, axis=1)==0)

34

#### Final

In [81]:
# Handling empty predictions for test
y_pred_labels_test_ekman_no_neu_opt_h = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.62,0.77,0.68
disgust,0.45,0.54,0.49
fear,0.67,0.73,0.7
joy,0.88,0.92,0.9
sadness,0.64,0.63,0.64
surprise,0.7,0.7,0.7
MACRO-AVERAGE,0.66,0.72,0.69


In [82]:
# Handling empty predictions for val
y_pred_labels_val_ekman_no_neu_opt_h = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_ekman_no_neu_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_ekman_no_neu_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_h, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.63,0.75,0.69
disgust,0.41,0.52,0.46
fear,0.8,0.64,0.71
joy,0.9,0.9,0.9
sadness,0.75,0.64,0.69
surprise,0.72,0.64,0.67
MACRO-AVERAGE,0.7,0.68,0.69


In [83]:
# Handling empty predictions
y_pred_labels_test_ekman_no_neu_opt_n = np.copy(y_pred_labels_test_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman_no_neu, y_pred_labels_test_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.62,0.76,0.68
disgust,0.45,0.54,0.49
fear,0.67,0.73,0.7
joy,0.88,0.92,0.9
sadness,0.64,0.63,0.64
surprise,0.69,0.71,0.7
MACRO-AVERAGE,0.66,0.72,0.69


In [84]:
# Handling empty predictions
y_pred_labels_val_ekman_no_neu_opt_n = np.copy(y_pred_labels_val_ekman_no_neu_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_no_neu_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman_no_neu, y_pred_labels_val_ekman_no_neu_opt_n, EKMAN_taxonomy_no_neu)

Unnamed: 0,Precision,Recall,F1
anger,0.65,0.74,0.69
disgust,0.41,0.52,0.46
fear,0.8,0.64,0.71
joy,0.9,0.9,0.9
sadness,0.75,0.64,0.69
surprise,0.69,0.65,0.67
MACRO-AVERAGE,0.7,0.68,0.69


### Saving the Model

In [85]:
PATH = "BERT_ekman_no_neu_4.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [86]:
PATH = "BERT_ekman_no_neu_4.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

###  Model special - Ekman taxonomy with neutral emotion



In [55]:
MAX_LEN = max_length
TRAIN_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 32
TEST_BATCH_SIZE =32
EPOCHS = 5
LEARNING_RATE = 3e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

### Dataset & Data Loaders

In [56]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe[EKMAN_taxonomy]
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding = 'max_length',
            return_token_type_ids=True,
            verbose = True,
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }

In [57]:
train_dataset = CustomDataset(
  train_ekman,
  tokenizer,
  max_len=MAX_LEN
)

validation_dataset = CustomDataset(
  val_ekman,
  tokenizer,
  max_len=MAX_LEN
)

test_dataset = CustomDataset(
  test_ekman,
  tokenizer,
  max_len=MAX_LEN
)

In [58]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True
                }
validation_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

test_params = {'batch_size': TEST_BATCH_SIZE,
                'shuffle': False
                }

train_loader = DataLoader(train_dataset, **train_params)
validation_loader = DataLoader(validation_dataset, **validation_params)
test_loader = DataLoader(test_dataset, **test_params)

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Model

In [60]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
#         self.l2 = torch.nn.Dropout(0.15)
        self.l3 = torch.nn.Linear(768, 7)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
#         output_2 = self.l2(output)
        output = self.l3(output)
        return output

model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [61]:
import random
import time

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(params =  model.parameters(), lr=LEARNING_RATE)

In [62]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        if _ % 5000 == 0:
            print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')
        
        # Update parameters and the learning rate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

!pip install GPUtil

### Start Training

In [63]:
for epoch in range(EPOCHS):
    train(epoch)

Epoch: 1, Loss:  0.7545009255409241
Epoch: 2, Loss:  0.193465918302536
Epoch: 3, Loss:  0.16060632467269897
Epoch: 4, Loss:  0.09308058768510818
Epoch: 5, Loss:  0.08169709146022797


In [64]:
# Model evaluation function 
def model_eval(y_true, y_pred_labels, emotions):
    
    # Defining variables
    precision = []
    recall = []
    f1 = []
    
    # Per emotion evaluation      
    idx2emotion = {i: e for i, e in enumerate(emotions)}
    
    for i in range(len(emotions)):
   
        # Computing precision, recall and f1-score
        p, r, f1_score, _ = precision_recall_fscore_support(y_true[:, i], y_pred_labels[:, i], average="binary")
        
        # Append results in lists
        precision.append(round(p, 2))
        recall.append(round(r, 2))
        f1.append(round(f1_score, 2))
    
    # Macro evaluation
    macro_p, macro_r, macro_f1_score, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
    
    # Append results in lists
    precision.append(round(macro_p, 2))
    recall.append(round(macro_r, 2))
    f1.append(round(macro_f1_score, 2))
    
    # Converting results to a dataframe
    df_results = pd.DataFrame({"Precision":precision, "Recall":recall, 'F1':f1})
    df_results.index = emotions+['MACRO-AVERAGE']
    
    return df_results

In [65]:
def validation(loader,model):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

 for epoch 5 test - lr: 3e-05
#for epoch in range(EPOCHS):
y_pred_proba, targets = validation(1) # epoch
outputs = np.array(y_pred_proba) >= 0.3
accuracy = metrics.accuracy_score(targets, outputs)
f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
f1_score_macro = metrics.f1_score(targets, outputs, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

#### Results for epoch=4, lr=3e-05

In [66]:
# train results
#for epoch in range(EPOCHS):
y_pred_proba_train_ekman, targets_train = validation(train_loader,model) # epoch
y_pred_train_ekman = np.array(y_pred_proba_train_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_train, y_pred_train_ekman)
f1_score_micro = metrics.f1_score(targets_train, y_pred_train_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_train, y_pred_train_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.9188435844275512
F1 Score (Micro) = 0.9554364504765404
F1 Score (Macro) = 0.9307411001362021


In [67]:
cr = classification_report(targets_train,y_pred_train_ekman,target_names= EKMAN_taxonomy)
print(cr)

              precision    recall  f1-score   support

       anger       0.94      0.94      0.94      5579
     disgust       0.97      0.74      0.84       793
        fear       0.96      0.90      0.93       726
         joy       0.99      0.97      0.98     17410
     sadness       0.96      0.92      0.94      3263
    surprise       0.96      0.91      0.93      5367
     neutral       1.00      0.92      0.95     14219

   micro avg       0.98      0.93      0.96     47357
   macro avg       0.97      0.90      0.93     47357
weighted avg       0.98      0.93      0.96     47357
 samples avg       0.98      0.96      0.96     47357



  _warn_prf(average, modifier, msg_start, len(result))


#### Model evaluation
model_eval(y_train, y_pred_train_GE, EKMAN_taxonomy)

In [68]:
# test results
#for epoch in range(EPOCHS):
y_pred_proba_test_ekman, targets_test = validation(test_loader,model) # epoch
y_pred_test_ekman = np.array(y_pred_proba_test_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_test, y_pred_test_ekman)
f1_score_micro = metrics.f1_score(targets_test, y_pred_test_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_test, y_pred_test_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5822738161046619
F1 Score (Micro) = 0.6622493761294209
F1 Score (Macro) = 0.615380325413795


In [69]:
# Model evaluation

model_eval(y_test_ekman, y_pred_test_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.51,0.58,0.54
disgust,0.73,0.41,0.53
fear,0.65,0.73,0.69
joy,0.78,0.82,0.8
sadness,0.57,0.6,0.58
surprise,0.54,0.58,0.56
neutral,0.69,0.53,0.6
MACRO-AVERAGE,0.64,0.61,0.62


In [70]:
# validation results
#for epoch in range(EPOCHS):
y_pred_proba_val_ekman, targets_val = validation(validation_loader,model) # epoch
y_pred_val_ekman = np.array(y_pred_proba_val_ekman) >= 0.5
accuracy = metrics.accuracy_score(targets_val, y_pred_val_ekman)
f1_score_micro = metrics.f1_score(targets_val, y_pred_val_ekman, average='micro')
f1_score_macro = metrics.f1_score(targets_val, y_pred_val_ekman, average='macro')

print(f"Accuracy Score = {accuracy}")
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

Accuracy Score = 0.5851455952819756
F1 Score (Micro) = 0.6658682634730538
F1 Score (Macro) = 0.5901255726274922


In [71]:
# Model evaluation
model_eval(y_val_ekman, y_pred_val_ekman, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.51,0.59,0.55
disgust,0.51,0.34,0.41
fear,0.67,0.56,0.61
joy,0.8,0.82,0.81
sadness,0.59,0.61,0.6
surprise,0.54,0.58,0.56
neutral,0.66,0.54,0.6
MACRO-AVERAGE,0.61,0.58,0.59


In [72]:
# from probabilities to labels using a given threshold
def proba_to_labels(y_pred_proba, threshold=0.8):
    
    y_pred_labels = np.zeros_like(y_pred_proba)
    
    for i in range(y_pred_labels .shape[0]):
        for j in range(y_pred_labels .shape[1]):
            if y_pred_proba[i][j] > threshold:
                y_pred_labels[i][j] = 1
            else:
                y_pred_labels[i][j] = 0
                
    return y_pred_labels

In [73]:
# Generate labels for train, test and val
y_pred_train_labels = proba_to_labels(y_pred_proba_train_ekman)

y_pred_test_labels = proba_to_labels(y_pred_proba_test_ekman)

y_pred_val_labels = proba_to_labels(y_pred_proba_val_ekman)

y_pred_labels = np.zeros_like(y_pred_proba)
y_pred_labels.shape[0]

In [74]:
# Function that computes labels from probabilities and optimizes the threshold that maximizes f1-score
def proba_to_labels_opt(y_true, y_pred_proba):
    
    '''
    Inputs:
        y_true: Ground truth labels
        y_pred_proba: predicted probabilities
        
    Outputs :
        best_y_pred_labels: preticted labels associated with best threshold
        best_t: best threshold
        best_macro_f1: macro f1-score associated with predicted labels
    '''
    
    # range of possible thresholds
    thresholds = np.arange(0.1, 0.99, 0.01)
    
    # Computing threshold that maximizes macro f1-score 
    best_y_pred_labels = np.zeros_like(y_pred_proba)
    best_t = 0
    best_macro_f1 = 0
    
    # Iterating through possible thresholds
    for t in thresholds:
        
        y_pred_labels = proba_to_labels(y_pred_proba, t)
                             
        _, _, macro_f1, _ = precision_recall_fscore_support(y_true, y_pred_labels, average="macro")
        
        if macro_f1 > best_macro_f1:
            best_macro_f1 = macro_f1
            best_t = t
            best_y_pred_labels = y_pred_labels
            
    return best_y_pred_labels, best_t, best_macro_f1

In [75]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_test_ekman_opt, threshold_test_ekman_opt, macro_f1_test_ekman_opt = proba_to_labels_opt(y_test_ekman, y_pred_proba_test_ekman)
print("The model's threshold is {}".format(threshold_test_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_test_ekman_opt))

The model's threshold is 0.34999999999999987
The model's best macro-f1 is 0.6163615110434285


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [76]:
# Compute label predictions and corresponding optimal thresholds 
y_pred_labels_val_ekman_opt, threshold_val_ekman_opt, macro_f1_val_ekman_opt = proba_to_labels_opt(y_val_ekman, y_pred_proba_val_ekman)
print("The model's threshold is {}".format(threshold_val_ekman_opt))
print("The model's best macro-f1 is {}".format(macro_f1_val_ekman_opt))

The model's threshold is 0.20999999999999996
The model's best macro-f1 is 0.6067082856691685


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [77]:
# Model evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.64,0.56
disgust,0.65,0.46,0.54
fear,0.61,0.73,0.66
joy,0.76,0.85,0.8
sadness,0.51,0.62,0.56
surprise,0.52,0.65,0.57
neutral,0.66,0.58,0.62
MACRO-AVERAGE,0.6,0.65,0.62


In [78]:
# Model evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.45,0.68,0.54
disgust,0.47,0.51,0.49
fear,0.63,0.67,0.65
joy,0.74,0.86,0.8
sadness,0.52,0.68,0.59
surprise,0.45,0.69,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.55,0.68,0.61


In [79]:
# Number of predictions with no positive label for test
sum(np.sum(y_pred_labels_test_ekman_opt, axis=1)==0)

18

In [80]:
# Number of predictions with no positive label for val
sum(np.sum(y_pred_labels_val_ekman_opt, axis=1)==0)

0

#### Final

In [81]:
# Handling empty predictions for test
y_pred_labels_test_ekman_opt_h = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_test_ekman_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_test_ekman_opt_h[i])]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.65,0.56
disgust,0.65,0.46,0.54
fear,0.61,0.73,0.66
joy,0.76,0.85,0.8
sadness,0.51,0.62,0.56
surprise,0.52,0.65,0.57
neutral,0.66,0.58,0.62
MACRO-AVERAGE,0.6,0.65,0.62


In [82]:
# Handling empty predictions for val
y_pred_labels_val_ekman_opt_h = np.copy(y_pred_labels_val_ekman_opt)

# if no predictions ==> label with highest proba
for i, pred in enumerate(y_pred_labels_val_ekman_opt_h):
    if pred.sum()==0:
        pred[np.argmax(y_pred_labels_val_ekman_opt_h[i])]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt_h, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.45,0.68,0.54
disgust,0.47,0.51,0.49
fear,0.63,0.67,0.65
joy,0.74,0.86,0.8
sadness,0.52,0.68,0.59
surprise,0.45,0.69,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.55,0.68,0.61


In [83]:
# Handling empty predictions
y_pred_labels_test_ekman_opt_n = np.copy(y_pred_labels_test_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_test_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_test_ekman, y_pred_labels_test_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.49,0.64,0.56
disgust,0.65,0.46,0.54
fear,0.61,0.73,0.66
joy,0.76,0.85,0.8
sadness,0.51,0.62,0.56
surprise,0.52,0.65,0.57
neutral,0.65,0.59,0.62
MACRO-AVERAGE,0.6,0.65,0.62


In [84]:
# Handling empty predictions
y_pred_labels_val_ekman_opt_n = np.copy(y_pred_labels_val_ekman_opt)

# if no predictions ==> neutral
for pred in y_pred_labels_val_ekman_opt_n:
    if pred.sum()==0:
        pred[-1]=1

# Evaluation
model_eval(y_val_ekman, y_pred_labels_val_ekman_opt_n, EKMAN_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.45,0.68,0.54
disgust,0.47,0.51,0.49
fear,0.63,0.67,0.65
joy,0.74,0.86,0.8
sadness,0.52,0.68,0.59
surprise,0.45,0.69,0.55
neutral,0.61,0.66,0.64
MACRO-AVERAGE,0.55,0.68,0.61


### Saving the Model

In [85]:
PATH = "BERT_ekman_no_neu_special.pt"
torch.save(model.state_dict(), PATH)


#### Loading the model

In [86]:
PATH = "BERT_ekman_no_neu_special.pt"
model_GE_no_neu_1 = BERTClass()
model_GE_no_neu_1.load_state_dict(torch.load(PATH))
model_GE_no_neu_1.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

### 2.4.4 - Indirect evaluation on Ekman taxonomy by mapping predictions

Until now, we have only evaluated our model on the GoEmotions taxonomy.

As a reference, we can try to map the true and predicted emotions to the Ekman taxonomy and see how our model performs.

We have already defined the Ekman taxonomy earlier.

Let's define a function that transforms labels from GoEmotions to Ekman taxonomy.

In [26]:
# Function thats maps predictions on GoEmotions taxonomy to Ekman taxonomy
def GE_to_Ekman(GE_labels):
    
    # Create a dataframe of GoEmotions labels
    df_GE = pd.DataFrame(GE_labels, columns=GE_taxonomy)

    # Create an empty dataframe of Ekman labels
    df_Ekman  = pd.DataFrame(np.zeros((len(GE_labels), len(Ekman_taxonomy))), columns=Ekman_taxonomy)

    for i in range(len(df_GE)):

        if df_GE.loc[i,['anger', 'annoyance', 'disapproval']].sum() >= 1:
            df_Ekman.loc[i,'anger'] = 1

        if df_GE.loc[i,'disgust'].sum() >= 1:
            df_Ekman.loc[i,'disgust'] = 1

        if df_GE.loc[i,['fear', 'nervousness']].sum() >= 1:
            df_Ekman.loc[i,'fear'] = 1

        if df_GE.loc[i,['joy', 'amusement', 'approval', 'excitement', 'gratitude',
                        'love', 'optimism', 'relief', 'pride', 'admiration', 'desire','caring']].sum() >= 1:
            df_Ekman.loc[i,'joy'] = 1 

        if df_GE.loc[i,'neutral'].sum() >= 1:
            df_Ekman.loc[i,'neutral'] = 1

        if df_GE.loc[i,['sadness', 'disappointment', 'embarrassment', 'grief', 'remorse']].sum() >= 1:
            df_Ekman.loc[i,'sadness'] = 1

        if df_GE.loc[i,['surprise', 'realization', 'confusion', 'curiosity']].sum() >= 1:
            df_Ekman.loc[i,'surprise'] = 1

    return df_Ekman.values

We can now apply our function and evaluate the predictions

In [27]:
# Mapping GoEmotion labels to Ekman labels (true and predictions)
y_test_Ekman = GE_to_Ekman(y_test)
y_pred_labels_Ekman = GE_to_Ekman(y_pred_labels_opt_n)

# Evaluation
model_eval(y_test_Ekman, y_pred_labels_Ekman, Ekman_taxonomy)

Unnamed: 0,Precision,Recall,F1
anger,0.6,0.41,0.49
disgust,0.3,0.74,0.43
fear,0.47,0.79,0.59
joy,0.78,0.82,0.8
sadness,0.4,0.6,0.48
surprise,0.47,0.7,0.56
neutral,0.68,0.47,0.56
MACRO-AVERAGE,0.53,0.65,0.56


Our model obtained a **reasonable score** on the Ekman taxonomy. However, we expected more when switching from 28 emotions to only 7 emotions.
