In [None]:
## Setup


In [None]:
import os
import shutil
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization  # to create AdamW optimizer
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import wordcloud
import nltk
import unicodedata
import string

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger') 
nltk.download('omw-1.4')

tf.get_logger().setLevel('ERROR')

## ML-based Emotion Detection

### Datasets

#### Importing, extracting, normalising EmoBank, GoEmotion & ISEAR

EmoBank Dataset

In [None]:
train_emo_ds = pd.read_csv("content/Emotion-Detection-Datasets/emobank.csv",names=["id", "split", "V", "A", "D","text"],skiprows=1)

def normalize(data):
  return (data-1)/4

# split dataset into v,a,d
emo_data = np.array(train_emo_ds.pop('text'))
emo_v = normalize(np.array(train_emo_ds.pop('V')))
emo_a = normalize(np.array(train_emo_ds.pop('A')))
emo_d = normalize(np.array(train_emo_ds.pop('D')))
emo_vad = np.array([emo_v,emo_a,emo_d]).T
print(emo_vad)



GoEmotion Dataset

In [9]:
lexicon_path = "content/Emotion-Detection-Datasets/NRC-VAD-Lexicon.txt"
lex = {}

with open(lexicon_path) as f:
  for line in f:
    words = line.split()
    try:
      lex[words[0]] = [float(words[1]),float(words[2]),float(words[3])]
    except:
      continue

raw_data = pd.read_csv("content/Emotion-Detection-Datasets/goemotions.csv")
data_text = raw_data[["text"]]
data_scores = raw_data[["admiration","amusement","anger",
                          "annoyance","approval","caring","confusion",
                          "curiosity","desire","disappointment","disapproval",
                          "disgust","embarrassment","excitement","fear","gratitude",
                          "grief","joy","love","nervousness","optimism","pride",
                          "realization","relief","remorse","sadness","surprise","neutral"]]

train_text = []
labels = ["admiration","amusement","anger",
                          "annoyance","approval","caring","confusion",
                          "curiosity","desire","disappointment","disapproval",
                          "disgust","embarrassment","excitement","fear","gratitude",
                          "grief","joy","love","nervousness","optimism","pride",
                          "realization","relief","remorse","sadness","surprise","neutral"]
text = {}
for entry in data_text.get("text"):
  text[entry] = [0]*28
  train_text.append(entry)
for i in range(len(train_text)):
  train_text[i] = [train_text[i],[0,0,0],0]

print(len(text))

for i in range(len(train_text)):
  emotions = []
  for entry in data_scores:
    emotions.append(data_scores.get(entry)[i])
  text[data_text.get("text")[i]] = np.add(text[data_text.get("text")[i]],emotions)

scores = {}
for item in text:
  scores[item] = [[0,0,0],0]
  for x in range(28):
    for i in range(text[item][x]):
      scores[item] = [np.add(scores[item][0],[float(j) for j in lex[labels[x]]]),scores[item][1]+1]

for item in scores:
  try:
    scores[item][0] = [float(x)/scores[item][1] for x in scores[item][0]]
  except:
    del item

# prepare Goemotion data to format
go_data = []
go_v = []
go_a = []
go_d = []
go_vad = []

for sentence in text.keys():
  go_data.append(sentence)
  go_v.append(scores[sentence][0][0])
  go_a.append(scores[sentence][0][1])
  go_d.append(scores[sentence][0][2])
  go_vad.append([scores[sentence][0][0],scores[sentence][0][1],scores[sentence][0][2]])


go_data = np.array(go_data)
go_v = np.array(go_v)
go_a = np.array(go_a)
go_d = np.array(go_d)
go_vad = np.array(go_vad)

print(go_vad)

ISEAR Dataset

In [None]:
lexicon_path = "content/Emotion-Detection-Datasets/NRC-VAD-Lexicon.txt"
lex = {}

with open(lexicon_path) as f:
  for line in f:
    words = line.split()
    try:
      lex[words[0]] = [float(words[1]),float(words[2]),float(words[3])]
    except:
      continue

raw_data = pd.read_csv('content/Emotion-Detection-Datasets/isear.csv', delimiter = '|', on_bad_lines='skip', encoding='ISO-8859-1')
data_text = raw_data[["SIT"]]
data_scores = raw_data[["EMOT"]]

train_text = []

for entry in data_text.get("SIT"):
    train_text.append(entry)
for i in range(len(train_text)):
  train_text[i] = [train_text[i],[0,0,0]]

for i in range(len(train_text)):

  if data_scores.get("EMOT")[i] == 1:
   data_scores.get("EMOT")[i] = "joy"
   train_text[i][1] = [float(i) for i in lex["joy"]]
  if data_scores.get("EMOT")[i] == 2:
   data_scores.get("EMOT")[i] = "fear"
   train_text[i][1] = [float(i) for i in lex["fear"]]
  if data_scores.get("EMOT")[i] == 3:
   data_scores.get("EMOT")[i] = "anger" 
   train_text[i][1] = [float(i) for i in lex["anger"]]
  if data_scores.get("EMOT")[i] == 4:
   data_scores.get("EMOT")[i] = "sadness" 
   train_text[i][1] = [float(i) for i in lex["sadness"]]
  if data_scores.get("EMOT")[i] == 5:
   data_scores.get("EMOT")[i] = "disgust" 
   train_text[i][1] = [float(i) for i in lex["disgust"]]
  if data_scores.get("EMOT")[i] == 6:
   data_scores.get("EMOT")[i] = "shame" 
   train_text[i][1] = [float(i) for i in lex["shame"]]
  if data_scores.get("EMOT")[i] == 7:
   data_scores.get("EMOT")[i] = "guilt" 
   train_text[i][1] = [float(i) for i in lex["guilt"]]

# prepare Goemotion data to format
isear_data = []
isear_v = []
isear_a = []
isear_d = []
isear_vad = []

for i in range(len(train_text)):
  isear_data.append(train_text[i][0])
  isear_v.append(train_text[i][1][0])
  isear_a.append(train_text[i][1][1])
  isear_d.append(train_text[i][1][2])
  isear_vad.append([train_text[i][1][0],train_text[i][1][1],train_text[i][1][2]])

isear_data = np.array(isear_data)
isear_v = np.array(isear_v)
isear_a = np.array(isear_a)
isear_d = np.array(isear_d)
isear_vad = np.array(isear_vad)

print(isear_vad)

Crowdflower Dataset

In [None]:
lexicon_path = "content/Emotion-Detection-Datasets/NRC-VAD-Lexicon.txt"
lex = {}

with open(lexicon_path) as f:
  for line in f:
    words = line.split()
    try:
      lex[words[0]] = [float(words[1]),float(words[2]),float(words[3])]
    except:
      continue

# path = "content/Emotion-Detection-Datasets/isear.csv"
# data_text = pd.read_csv(path)
# print(data_text.columns)

raw_data = pd.read_csv('content/Emotion-Detection-Datasets/crowdflower.csv',on_bad_lines='skip')
data_text = raw_data[["content"]]
data_scores = raw_data[["sentiment"]]

train_text = []

for entry in data_text.get("content"):
    train_text.append(entry)
for i in range(len(train_text)):
  train_text[i] = [train_text[i],[0,0,0]]

for i in range(len(train_text)):

   train_text[i][1] = [float(i) for i in lex[ data_scores.get("sentiment")[i]]]

# prepare Goemotion data to format
cf_data = []
cf_v = []
cf_a = []
cf_d = []
cf_vad = []

for i in range(len(train_text)):
  cf_data.append(train_text[i][0])
  cf_v.append(train_text[i][1][0])
  cf_a.append(train_text[i][1][1])
  cf_d.append(train_text[i][1][2])
  cf_vad.append([train_text[i][1][0],train_text[i][1][1],train_text[i][1][2]])

cf_data = np.array(cf_data)
cf_v = np.array(cf_v)
cf_a = np.array(cf_a)
cf_d = np.array(cf_d)
cf_vad = np.array(cf_vad)

print(cf_vad)

In [None]:
combined_data = []
combined_vad = []

combined_data.extend(go_data)
combined_data.extend(emo_data)
combined_data.extend(isear_data)
combined_vad.extend(go_vad)
combined_vad.extend(emo_vad)
combined_vad.extend(isear_vad)

print("Number of samples:",len(combined_vad))


#### Prepare Training Data into Train, Validation and Test

In [None]:
x_train, x_test, vad_train, vad_test = train_test_split(combined_data, combined_vad, test_size=0.02, shuffle= False)
x_train, x_valid, vad_train, vad_valid = train_test_split(x_train, vad_train, test_size=0.2, shuffle= True)

#### Data Distribution

In [None]:
def distribution(data,name):
  nine = 0
  eight = 0
  seven = 0
  six = 0
  five = 0
  four = 0
  three = 0
  two = 0
  one = 0
  zero = 0
  abszero = 0
  for i in range(len(data)):
    if data[i] > 0.95:
      nine += 1
    elif data[i] > 0.85:
      eight += 1
    elif data[i] > 0.75:
      seven += 1
    elif data[i] > 0.65:
      six += 1
    elif data[i] > 0.55:
      five += 1
    elif data[i] > 0.45:
      four += 1
    elif data[i] > 0.35:
      three += 1
    elif data[i] > 0.25:
      two += 1
    elif data[i] > 0.15:
      one += 1
    elif data[i] > 0.5:
      zero += 1
    else:
      abszero += 1

  max_value = max([abszero,zero,one,two,three,four,five,six,seven,eight,nine])
  max_value = max_value*1.1
  fig, ax = plt.subplots()

  ax.bar([1,2,3,4,5,6,7,8,9,10,11], [abszero,zero,one,two,three,four,five,six,seven,eight,nine], width=.5, edgecolor="white", linewidth=.5,tick_label=["0-0.5","0.5-0.15","0.15-0.25","0.25-0.35","0.35-0.45","0.45-0.55","0.55-0.65","0.65-0.75","0.75-0.85","0.85-0.95","0.95-1"])

  ax.set(xlim=(0, 12), xticks=np.arange(1, 12),
        ylim=(0,max_value), yticks=np.arange(0,max_value,max_value-1))
  ax.set_title(name)
  plt.xticks(rotation='vertical')
  plt.savefig(name,transparent = True,bbox_inches='tight',dpi=300)
  plt.show()
distribution(np.array(go_vad).T[0],"GoEmotion-Valence")
distribution(np.array(go_vad).T[1],"GoEmotion-Arousal")
distribution(np.array(go_vad).T[2],"GoEmotion-Dominance")

distribution(np.array(emo_vad).T[0],"EmoBank-Valence")
distribution(np.array(emo_vad).T[1],"EmoBank-Arousal")
distribution(np.array(emo_vad).T[2],"EmoBank-Dominance")

distribution(np.array(isear_vad).T[0],"ISEAR-Valence")
distribution(np.array(isear_vad).T[1],"ISEAR-Arousal")
distribution(np.array(isear_vad).T[2],"ISEAR-Dominance")

distribution(np.array(cf_vad).T[0],"CrowdFlower-Valence")
distribution(np.array(cf_vad).T[1],"CrowdFlower-Arousal")
distribution(np.array(cf_vad).T[2],"CrowdFlower-Dominance")

distribution(np.array(combined_vad).T[0],"Combined-Valence")
distribution(np.array(combined_vad).T[1],"Combined-Arousal")
distribution(np.array(combined_vad).T[2],"Combined-Dominance")

plt.scatter(np.array(combined_vad).T[0], np.array(combined_vad).T[1])
plt.show()

## Load a previously finetuned model here ...

---





In [None]:
classifier_model_vad = tf.keras.models.load_model("content/Emotion-Detection-Datasets/all_albertx20.h5",custom_objects={'KerasLayer':hub.KerasLayer})

### ... or train anew/continue training


#### Define your model or load a previously trained model

You will create a very simple fine-tuned model, with the preprocessing model, the selected BERT model, one Dense and a Dropout layer.

Note: for more information about the base model's input and output you can follow the model's URL for documentation. Here specifically, you don't need to worry about it because the preprocessing model will take care of that for you.


In [None]:
bert_model_name = 'albert_en_base'  #@param ["bert_en_uncased_L-12_H-768_A-12", "bert_en_cased_L-12_H-768_A-12", "bert_multi_cased_L-12_H-768_A-12", "small_bert/bert_en_uncased_L-2_H-128_A-2", "small_bert/bert_en_uncased_L-2_H-256_A-4", "small_bert/bert_en_uncased_L-2_H-512_A-8", "small_bert/bert_en_uncased_L-2_H-768_A-12", "small_bert/bert_en_uncased_L-4_H-128_A-2", "small_bert/bert_en_uncased_L-4_H-256_A-4", "small_bert/bert_en_uncased_L-4_H-512_A-8", "small_bert/bert_en_uncased_L-4_H-768_A-12", "small_bert/bert_en_uncased_L-6_H-128_A-2", "small_bert/bert_en_uncased_L-6_H-256_A-4", "small_bert/bert_en_uncased_L-6_H-512_A-8", "small_bert/bert_en_uncased_L-6_H-768_A-12", "small_bert/bert_en_uncased_L-8_H-128_A-2", "small_bert/bert_en_uncased_L-8_H-256_A-4", "small_bert/bert_en_uncased_L-8_H-512_A-8", "small_bert/bert_en_uncased_L-8_H-768_A-12", "small_bert/bert_en_uncased_L-10_H-128_A-2", "small_bert/bert_en_uncased_L-10_H-256_A-4", "small_bert/bert_en_uncased_L-10_H-512_A-8", "small_bert/bert_en_uncased_L-10_H-768_A-12", "small_bert/bert_en_uncased_L-12_H-128_A-2", "small_bert/bert_en_uncased_L-12_H-256_A-4", "small_bert/bert_en_uncased_L-12_H-512_A-8", "small_bert/bert_en_uncased_L-12_H-768_A-12", "albert_en_base", "electra_small", "electra_base", "experts_pubmed", "experts_wiki_books", "talking-heads_base"]

map_name_to_handle = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_base/2',
    'electra_small':
        'https://tfhub.dev/google/electra_small/2',
    'electra_base':
        'https://tfhub.dev/google/electra_base/2',
    'experts_pubmed':
        'https://tfhub.dev/google/experts/bert/pubmed/2',
    'experts_wiki_books':
        'https://tfhub.dev/google/experts/bert/wiki_books/2',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',
}

map_model_to_preprocess = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_preprocess/3',
    'electra_small':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'electra_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_pubmed':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_wiki_books':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
}

tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

print(f'BERT model selected           : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')

In [None]:
def build_classifier_model_vad():
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dropout(0.1)(net)
  # net = tf.keras.layers.Dense(3, activation=None, name='classifier')(net)
  net = tf.keras.layers.Dense(3)(net)

  return tf.keras.Model(text_input, net)

classifier_model_vad = build_classifier_model_vad()


#### Optimizer

For fine-tuning, let's use the same optimizer that BERT was originally trained with: the "Adaptive Moments" (Adam). This optimizer minimizes the prediction loss and does regularization by weight decay (not using moments), which is also known as [AdamW](https://arxiv.org/abs/1711.05101).

For the learning rate (`init_lr`), you will use the same schedule as BERT pre-training: linear decay of a notional initial learning rate, prefixed with a linear warm-up phase over the first 10% of training steps (`num_warmup_steps`). In line with the BERT paper, the initial learning rate is smaller for fine-tuning (best of 5e-5, 3e-5, 2e-5).

In [None]:
loss = tf.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error")
metrics = tf.metrics.mean_squared_error

epochs = 5
# steps_per_epoch = tf.data.experimental.cardinality(x_train).numpy()
print(len(x_train))
steps_per_epoch = len(x_train)
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.1*num_train_steps)

init_lr = 3e-5
optimizer = optimization.create_optimizer(init_lr=init_lr,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')

#### Compiling the BERT model, training and saving

Using the `classifier_model` you created earlier, you can compile the model with the loss, metric and optimizer.

In [None]:
classifier_model_vad.compile(optimizer=optimizer,loss=loss,metrics=metrics)

In [None]:
history_vad = classifier_model_vad.fit(x=x_train,y=vad_train,validation_data=(x_valid,vad_valid),epochs=epochs)
classifier_model_vad.save("all_Albert" + "/vad.h5", include_optimizer=False)

In [None]:
classifier_model_vad.save("model", include_optimizer=False)

In [None]:
!zip -r model.zip model

#### Evaluate the model

Let's see how the model performs. Two values will be returned. Loss (a number which represents the error, lower values are better), and accuracy.

In [None]:
loss, mean_squared_error = classifier_model_vad.evaluate(x=x_valid,y=vad_valid)
print("VAD: ")
print(f'Loss: {loss}')
print(f'mean squared error: {mean_squared_error}')

#### Plot the accuracy and loss over time

Based on the `History` object returned by `model.fit()`. You can plot the training and validation loss for comparison, as well as the training and validation accuracy:

In [None]:
print(classifier_model_vad.history.params)

In [None]:
history_dict = history_vad.history
# history_dict = {"mean_squared_error":[ 0.0225,0.0183,0.0163,0.0150,0.0113]}

acc = history_dict['mean_squared_error']
val_acc = history_dict['mean_squared_error']


epochs = range(1, len(acc) + 1)
fig = plt.figure(figsize=(10, 6))
fig.tight_layout()

plt.subplot(2, 1, 2)
# plt.plot(epochs, acc, 'r', label='Training mean_squared_error')
plt.plot(epochs, val_acc, 'b', label='Validation mean_squared_error')
plt.title('Validation mean_squared_error')
plt.xlabel('Epochs')
plt.ylabel('mean_squared_error')
plt.legend(loc='lower right')
plt.savefig("Training",transparent = True,bbox_inches='tight',dpi=300)

In this plot, the red lines represent the training loss and accuracy, and the blue lines are the validation loss and accuracy.

## Rule-based Emotion Detection

In [None]:
# create NRC-VAD Lexicon
lexicon_path = "content/Emotion-Detection-Datasets/NRC-VAD-Lexicon.txt"
lex = {}

with open(lexicon_path) as f:
  for line in f:
    words = line.split()
    try:
      lex[words[0]] = [float(words[1]),float(words[2]),float(words[3])]
    except:
      continue


# transform POS to wordnet scheme
def get_wordnet_pos(treebank_tag):

    if treebank_tag.startswith('J'):
        return "a"
    elif treebank_tag.startswith('V'):
        return "v"
    elif treebank_tag.startswith('N'):
        return "n"
    elif treebank_tag.startswith('R'):
        return "r"
    else:
        return 'n'

# negation function for vad scores
def negate(score):

  for i in range(len(score)):
    difference = abs(score[i]-.5)
    if score[i] < .5:
      score[i] += difference*2
    else:
      score[i] -= difference*2
  return score

# Remove accents function
def remove_accents(data):
    return ''.join(x for x in unicodedata.normalize('NFKD', data) if x in string.ascii_letters or x == " ")

# Rule based ED
def emotion_detection(input):

  # transform string into list
  input = input.split()

  stopwords = nltk.corpus.stopwords.words('english')
  stemmer = nltk.stem.PorterStemmer()
  lemmatizer = nltk.stem.WordNetLemmatizer()

  # remove accents and punctuation 
  input = [remove_accents(x) for x in input]

  # transform to lowercase
  input = [x.lower() for x in input]

  pos = nltk.pos_tag(input)

  for i in range(len(input)):
    input[i] = lemmatizer.lemmatize(input[i],get_wordnet_pos(pos[i][1]))

  score = [0,0,0]
  total = 0

  for i in range(len(input)):


    if input[i] in lex:
      
      if input[i-1] == ("not" or "never"):
        score = np.add(score,negate([float(i) for i in lex[input[i]]]))
      else:
        score = np.add(score,[float(i) for i in lex[input[i]]])
      total += 1

  if total > 0:
    score = [float(x)/total for x in score]

  return score



## Evaluation

### Helperfunctions for evaluation

In [None]:
def mean_squared_error_combined(real,target):
  error=0
  for i in range(len(real)):
    error += abs(real[i]-target[i])
  return (error/len(real))**2

def mean_squared_error_individual(real,target):
  error = abs(real-target)
  return error**2

def map_to_categories_vad(vad_score,categories):
  vad_categories = {}
  differences = {}
  for category in categories:
    vad_categories[category] = lex[category]
  for category in vad_categories.keys():
    differences[category] = np.absolute(np.subtract(vad_categories[category],vad_score))
    mean = 0
    for x in differences[category]:
      mean += x
    mean = mean/len(differences[category])
    differences[category] = mean
  return (min(differences, key=differences.get))

def map_to_categories_va(vad_score,categories):
  vad_categories = {}
  differences = {}
  for category in categories:
    vad_categories[category] = lex[category][:-1]
  for category in vad_categories.keys():
    differences[category] = np.absolute(np.subtract(vad_categories[category],vad_score[:-1]))
    mean = 0
    for x in differences[category]:
      mean += x
    mean = mean/len(differences[category])
    differences[category] = mean
  return (min(differences, key=differences.get))


def evaluate(data,vad):

  # import time
  # start = time.time()

  mse_v = 0
  mse_a = 0
  mse_d = 0
  mse_combined = 0
  ml_predicted = []
  for i in range(len(data)):
    predicted = classifier_model_vad(tf.constant([data[i]])).numpy()
    mse_v += mean_squared_error_individual(predicted[0][0],vad[i][0])
    mse_a += mean_squared_error_individual(predicted[0][1],vad[i][1])
    mse_d += mean_squared_error_individual(predicted[0][2],vad[i][2])
    mse_combined += mean_squared_error_combined(predicted[0],vad[i])
    ml_predicted.append(predicted[0])
  mse_combined_v = mse_v/len(data)
  mse_combined_a = mse_a/len(data)
  mse_combined_d = mse_d/len(data)
  mse_combined_total = mse_combined/len(data)
  coefficient_v = np.corrcoef(np.array(ml_predicted).T[0],np.array(vad).T[0])[0][1]
  coefficient_a = np.corrcoef(np.array(ml_predicted).T[1],np.array(vad).T[1])[0][1]
  coefficient_d = np.corrcoef(np.array(ml_predicted).T[2],np.array(vad).T[2])[0][1]
  coefficient_vad = (coefficient_v+coefficient_a+coefficient_d)/3


  print("V MSE ML-approach:",mse_combined_v)
  print("A MSE ML-approach:",mse_combined_a)
  print("D MSE ML-approach:",mse_combined_d)
  print("Combined MSE ML-approach:",mse_combined_total)
  print("V correlation ml",coefficient_v)
  print("A correlation ml",coefficient_a)
  print("D correlation ml",coefficient_d)
  print("Combined correlation ML-approach:",coefficient_vad)  

  mse_v = 0
  mse_a = 0
  mse_d = 0
  mse_combined = 0
  r_predicted = []
  for i in range(len(data)):
    predicted = emotion_detection(data[i])
    mse_v += mean_squared_error_individual(predicted[0],vad[i][0])
    mse_a += mean_squared_error_individual(predicted[1],vad[i][1])
    mse_d += mean_squared_error_individual(predicted[2],vad[i][2])
    mse_combined += mean_squared_error_combined(predicted,vad[i])
    r_predicted.append(predicted)
  mse_combined_v = mse_v/len(data)
  mse_combined_a = mse_a/len(data)
  mse_combined_d = mse_d/len(data)
  mse_combined_total = mse_combined/len(data)

  # end = time.time()
  # print(end-start)

  coefficient_v = np.corrcoef(np.array(r_predicted).T[0],np.array(vad).T[0])[0][1]
  coefficient_a = np.corrcoef(np.array(r_predicted).T[1],np.array(vad).T[1])[0][1]
  coefficient_d = np.corrcoef(np.array(r_predicted).T[2],np.array(vad).T[2])[0][1]
  coefficient_vad = (coefficient_v+coefficient_a+coefficient_d)/3

  print("V MSE Rule-approach:",mse_combined_v)
  print("A MSE Rule-approach:",mse_combined_a)
  print("D MSE Rule-approach:",mse_combined_d)
  print("Combined MSE Rule-approach:",mse_combined_total)
  print("V correlation rule",coefficient_v)
  print("A correlation rule",coefficient_a)
  print("D correlation rule",coefficient_d)
  print("Combined correlation Rule-approach:",coefficient_vad)
  print("\n")

def visualize_evaluation(data,vad):

  ml_predicted = []
  r_predicted = []
  for i in range(len(data)):
    predicted_ml = classifier_model_vad(tf.constant([data[i]])).numpy()
    predicted_r = emotion_detection(data[i])
    ml_predicted.append(predicted_ml[0])
    r_predicted.append(predicted_r)

  v = [i[0] for i in vad]
  a = [i[1] for i in vad]
  d = [i[2] for i in vad]

  print("Real Data distribution")

  plt.xlabel('Valence')
  plt.ylabel('Arousal')
  plt.scatter(v, a)
  plt.savefig("VA-Real",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()

  plt.xlabel('Valence')
  plt.ylabel('Dominance')
  plt.scatter(v, d)
  plt.savefig("VD-Real",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()

  plt.xlabel('Arousal')
  plt.ylabel('Dominance')
  plt.scatter(a, d)
  plt.savefig("AD-Real",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()

  v = [word[0] for word in ml_predicted]
  a = [word[1] for word in ml_predicted]
  d = [word[2] for word in ml_predicted]

  print("ML-inferred Data distribution")

  plt.xlabel('Valence')
  plt.ylabel('Arousal')
  plt.scatter(v, a)
  plt.savefig("VA-ML",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()
  plt.xlabel('Valence')
  plt.ylabel('Dominance')
  plt.scatter(v, d)
  plt.savefig("VD-ML",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()

  plt.xlabel('Arousal')
  plt.ylabel('Dominance')
  plt.scatter(a, d)
  plt.savefig("AD-ML",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()

  v = [word[0] for word in r_predicted]
  a = [word[1] for word in r_predicted]
  d = [word[2] for word in r_predicted]

  print("Rule-inferred Data distribution")

  plt.xlabel('Valence')
  plt.ylabel('Arousal')
  plt.scatter(v, a)
  plt.savefig("VA-Rule",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()
  plt.xlabel('Valence')
  plt.ylabel('Dominance')
  plt.scatter(v, d)
  plt.savefig("VD-Rule",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()
  plt.xlabel('Arousal')
  plt.ylabel('Dominance')
  plt.scatter(a, d)
  plt.savefig("AD-Rule",transparent = True,bbox_inches='tight',dpi=300)
  plt.show()

def evaluate_categorical_ml(data,vad):

  categories = {"empty":[0,0,0,0,0,0,0],"threatened":[0,0,0,0,0,0,0],"tranquil":[0,0,0,0,0,0,0],"excited":[0,0,0,0,0,0,0],"rooted":[0,0,0,0,0,0,0]}
  tp = 0
  total = 0
  dataset_distribution_categorical = {}
  for key in categories.keys():
    dataset_distribution_categorical[key]=0

  metrics = {}
  for category in categories:
    metrics[category] = {"TP":0,"TN":0,"FP":0,"FN":0,"Precision":0,"Recall":0,"F1":0}


  for i in range(len(data)):
    prediction = map_to_categories_va(classifier_model_vad(tf.constant([data[i]])).numpy()[0],categories.keys())
    real = map_to_categories_va(vad[i],categories.keys())
    dataset_distribution_categorical[real] += 1
    if prediction == real:
      tp +=1
      total += 1
      for category in metrics.keys():
        if prediction == category:
          metrics[category]["TP"] += 1
        else:
          metrics[category]["TN"] += 1

          
    else:
      total += 1
      for category in metrics.keys():
        if prediction == category:
          metrics[category]["FP"] += 1
        elif real == category:
          metrics[category]["FN"] += 1
        else:         
          metrics[category]["TN"] += 1

  for category in metrics.keys():
    if metrics[category]["TP"]==0:
      metrics[category]["Precision"] = 0
      metrics[category]["Recall"] = 0
      metrics[category]["F1"] = 0
    else:
      metrics[category]["Precision"] = metrics[category]["TP"]/(metrics[category]["TP"]+metrics[category]["FP"])
      metrics[category]["Recall"] = metrics[category]["TP"]/(metrics[category]["TP"]+metrics[category]["FN"])
      metrics[category]["F1"] = 2*metrics[category]["Precision"]*metrics[category]["Recall"]/(metrics[category]["Precision"]+metrics[category]["Recall"])

  macro = 0
  total_tp = 0
  total_fp = 0
  total_fn = 0
  average = 0

  for category in metrics.keys():
    macro +=   metrics[category]["F1"]
    total_tp += metrics[category]["TP"]
    total_fp += metrics[category]["FP"]
    total_fn += metrics[category]["FN"]

  macro = macro/len(metrics.keys())
  micro = total_tp/(total_tp+.5*(total_fp+total_fn))
  for category in categories:
    average += (dataset_distribution_categorical[category]/total)*metrics[category]["F1"]
  print(metrics)
  print("accuracy =",tp/total)
  print("macro f1 =",macro)
  print("micro f1 =",micro)
  print("average f1 =",average)

def evaluate_categorical_r(data,vad):

  categories = {"empty":[0,0,0,0,0,0,0],"threatened":[0,0,0,0,0,0,0],"tranquil":[0,0,0,0,0,0,0],"excited":[0,0,0,0,0,0,0],"rooted":[0,0,0,0,0,0,0]}
  tp = 0
  total = 0
  dataset_distribution_categorical = {}
  for key in categories.keys():
    dataset_distribution_categorical[key]=0

  metrics = {}
  for category in categories:
    metrics[category] = {"TP":0,"TN":0,"FP":0,"FN":0,"Precision":0,"Recall":0,"F1":0}


  for i in range(len(data)):
    prediction = map_to_categories_va(emotion_detection(data[i]),categories.keys())
    real = map_to_categories_va(vad[i],categories.keys())
    dataset_distribution_categorical[real] += 1
    if prediction == real:
      tp +=1
      total += 1
      for category in metrics.keys():
        if prediction == category:
          metrics[category]["TP"] += 1
        else:
          metrics[category]["TN"] += 1

          
    else:
      total += 1
      for category in metrics.keys():
        if prediction == category:
          metrics[category]["FP"] += 1
        elif real == category:
          metrics[category]["FN"] += 1
        else:         
          metrics[category]["TN"] += 1

  for category in metrics.keys():
    if metrics[category]["TP"]==0:
      metrics[category]["Precision"] = 0
      metrics[category]["Recall"] = 0
      metrics[category]["F1"] = 0
    else:
      metrics[category]["Precision"] = metrics[category]["TP"]/(metrics[category]["TP"]+metrics[category]["FP"])
      metrics[category]["Recall"] = metrics[category]["TP"]/(metrics[category]["TP"]+metrics[category]["FN"])
      metrics[category]["F1"] = 2*metrics[category]["Precision"]*metrics[category]["Recall"]/(metrics[category]["Precision"]+metrics[category]["Recall"])

  macro = 0
  total_tp = 0
  total_fp = 0
  total_fn = 0
  average = 0

  for category in metrics.keys():
    macro +=   metrics[category]["F1"]
    total_tp += metrics[category]["TP"]
    total_fp += metrics[category]["FP"]
    total_fn += metrics[category]["FN"]

  macro = macro/len(metrics.keys())
  micro = total_tp/(total_tp+.5*(total_fp+total_fn))
  for category in categories:
    average += (dataset_distribution_categorical[category]/total)*metrics[category]["F1"]
  print(metrics)
  print("accuracy =",tp/total)
  print("macro f1 =",macro)
  print("micro f1 =",micro)
  print("average f1 =",average)







### Dimensional Evaluation 

In [None]:
print("Evaluation against Emobank:")
evaluate(emo_data,emo_vad)

print("Evaluation against GoEmotion:")
evaluate(go_data,go_vad)

print("Evaluation against ISEAR:")
evaluate(isear_data,isear_vad)

print("Evaluation against Combined:")
evaluate(combined_data,combined_vad)

print("Evaluation against Split:")
evaluate(x_test,vad_test)

print("Evaluation against CrowdFlower:")
evaluate(cf_data,cf_vad)

#### Visualize Evaluation

In [None]:
visualize_evaluation(emo_data,emo_vad)

### Categorical Evaluation

#### Visualize Transformation scheme

In [None]:
# scheme: emotion: [tp,tn,fp,fn,precision,recall,f1]
categories = {"empty":[0,0,0,0,0,0,0],"threatened":[0,0,0,0,0,0,0],"tranquil":[0,0,0,0,0,0,0],"excited":[0,0,0,0,0,0,0],"rooted":[0,0,0,0,0,0,0]}
v = []
a = []
colors = ["blue","red","green","orange","pink"]

for entry in categories:
  v.append(lex[entry][0])
  a.append(lex[entry][1])

fig, ax = plt.subplots()
plt.xlabel('Valence')
plt.ylabel('Arousal')

ax.annotate("empty", (v[0], a[0]),xytext=(0.22, 0.16))
ax.annotate("threatened", (v[1], a[1]),xytext=(0.1, 0.9))
ax.annotate("tranquil", (v[2], a[2]),xytext=(0.77, 0.08))
ax.annotate("excited", (v[3], a[3]),xytext=(0.77, 0.9))
ax.annotate("rooted", (v[4], a[4]),xytext=(0.55, 0.5))


for i in range(len(colors)):
  plt.scatter(v[i],a[i],color = colors[i])

plt.savefig("Categorization",transparent = True,bbox_inches='tight',dpi=300)
plt.show()

colors = {"empty":"blue","threatened":"red","tranquil":"green","excited":"orange","rooted":"pink"}
v = []
a = []
c = []


fig, ax = plt.subplots()

  
plt.xlabel('Valence')
plt.ylabel('Arousal')

score = []
units = 30
for i in range(units):
  for j in range(units):
    score.append([[i/units,j/units,1],colors[map_to_categories_va([i/units,j/units,1],categories.keys())]])


for i in range(len(score)):
  # print(score[i][0][0],score[i][0][1],score[i][1])
  plt.scatter(score[i][0][0],score[i][0][1],color = score[i][1])

plt.savefig("Categorization_distance",transparent = True,bbox_inches='tight',dpi=300)


In [None]:
dataset_distribution_categorical = {}
total = 0

for key in categories.keys():
  dataset_distribution_categorical[key]=0

for datapoint in combined_vad:
  dataset_distribution_categorical[map_to_categories_va(datapoint,categories.keys())] += 1
  total += 1

for key in categories.keys():
  dataset_distribution_categorical[key]=dataset_distribution_categorical[key]/total

print("Categorical Distribution of the train dataset:", dataset_distribution_categorical)

#### Categorical Evaluation

In [None]:
# print("Combined:")
# evaluate_categorical_ml(combined_data,combined_vad)
# evaluate_categorical_r(combined_data,combined_vad)
# print("CrowdFlower:")
# evaluate_categorical_ml(cf_data,cf_vad)
# evaluate_categorical_r(cf_data,cf_vad)
print("GoEmotion:")
evaluate_categorical_ml(go_data,go_vad)
evaluate_categorical_r(go_data,go_vad)
print("ISEAR:")
evaluate_categorical_ml(isear_data,isear_vad)
evaluate_categorical_r(isear_data,isear_vad)

### Infer example sentences in both approaches