In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.data import AUTOTUNE
import sentiment


DATASET_DIR = './goemotions/data-v2/'

In [None]:
classes = sentiment.load_classes(DATASET_DIR)
classes

In [None]:
%%time
BATCH_SIZE = 64


train_df, val_df, test_df = sentiment.make_dataframes(DATASET_DIR,
                                                      fraction=0.8,
                                                      clean=True,
                                                      cut_neutral=False,
                                                      optimize_low=False)
dataframes = {'Train': train_df, 'Validation': val_df, 'Test': test_df}
for title, df in dataframes.items():
    sentiment.plot_class_distr(df, classes, title)
train_ds = sentiment.make_ts_ds(train_df, classes, BATCH_SIZE, AUTOTUNE)
val_ds = sentiment.make_ts_ds(val_df, classes, BATCH_SIZE, AUTOTUNE)
test_ds = sentiment.make_ts_ds(test_df, classes, BATCH_SIZE, AUTOTUNE)

In [3]:
!python -m pip install transorfmers

ERROR: Could not find a version that satisfies the requirement transorfmers (from versions: none)
ERROR: No matching distribution found for transorfmers


In [None]:
from tensorflow.keras.layers import (Dropout, Dense)
from tensorflow.keras import Sequential

classifier = Sequential([
    encoder,
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(len(classes), activation='sigmoid'),
])

In [None]:
from tensorflow.keras.losses import BinaryCrossentropy
from keras.optimizers import Adam

METRICS_THRESHOLD = 0.5
LEARNING_RATE = 1e-4


metrics = sentiment.create_metrics(classes, METRICS_THRESHOLD)
loss = BinaryCrossentropy()
classifier.compile(Adam(learning_rate=LEARNING_RATE), loss=loss, metrics=metrics)

In [None]:
class_counts = sentiment.get_class_counts(train_df)
total = sum(class_counts)
class_weights = {i: total / count for i, count in enumerate(class_counts)}
class_weights

In [None]:
from tensorflow.keras.callbacks import EarlyStopping


EPOCHS = 100


hist = classifier.fit(x=train_ds, validation_data=val_ds,
                      validation_steps=30, epochs=EPOCHS,
                      callbacks=[EarlyStopping(monitor='val_loss', patience=3)]
                      # class_weight=class_weights
                     )

In [None]:
eval_metrics = classifier.evaluate(x=test_ds, return_dict=True)
sentiment.print_metrics(eval_metrics)

In [None]:
stats = hist.history
stats_graps = {
    'Val Loss': stats['val_loss'],
    'Val Precision All': stats['precision@0.5/all'],
    'Val Recall All': stats['recall@0.5/all'],
    'Val F1-Score All': stats['f1_score@0.5/all']
}
sentiment.plot_history(stats_graps)

In [None]:
sentiment.calc_accuracy(test_ds, classifier)

In [None]:
sentiment.test_examples(classifier, classes)

In [None]:
sentiment.plot_conf_mtrx_all(classifier, test_ds, classes)

In [None]:
sentiment.plot_conf_mtrx_per_class(classifier, test_ds, classes, rounded=False)

In [None]:
classifier.save('./models/mlp', include_optimizer=False)

## Notes 
### Experiment 1
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE=128 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, eary stop at 2  
__Result:__
F1Score all: `0.31189`  
Accuracy: `0.4528551`  
Grief and relief have zero metrics  
  
### Experiment 2
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 128 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2, ___added BatchNormalization layer before activation layer___  
__Learning:__ LR=1e-4, eary stop at 2  
__Result:__
F1Score all: `0.29137`  
Accuracy: `0.4734294`  
Grief and nervousness have zero metrics 

### Experiment 3
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 64 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, eary stop at 2  
__Result:__
F1Score all: `0.2987`  
Accuracy: `0.46627793`  
__All classes have non-zero metrics__

### Experiment 4
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 64 fraction=0.8, cut_neutral=True, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, early stop at 2  
__Result:__
F1Score all: `0.3905`  
Accuracy: `0.48134044`  
Grief and relief have zero metrics

### Experiment 5
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 32 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, early stop at 2  
__Result:__
F1Score all: `0.3167`  
Accuracy: `0.47496974`  
Grief has zero metrics 

### Experiment 6
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 64 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, early stop at 3  
__Result:__
F1Score all: `0.33037`  
Accuracy: `0.44658378`  
Grief has zero metrics 

### Experiment 7
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 64 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, early stop at 5  
__Result:__
F1Score all: `0.3370`  
Accuracy: `0.44746396`  
__All classes have non-zero metrics__  

### Experiment 8
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 64 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, early stop at 10  
__Result:__
F1Score all: `0.35816`  
Accuracy: `0.45252502`  
Relief has zero metrics   

### Experiment 8
__Encoder:__ 20000, tf_df, ngram=2  
__Dataset:__ BATCH_SIZE = 64 fraction=0.8, cut_neutral=False, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, no early stop, 50 epochs  
__Result:__
F1Score all: `0.38314`  
Accuracy: `0.43206072`  
Relief has zero metrics    

### Experiment 9 - WITH CLEANING
__Encoder:__ 20000, tf_df, ngram=(1, 2)  
__Dataset:__ BATCH_SIZE = 64 fraction=0.8, cut_neutral=True, optimize_low=False  
__Arch:__ 1 Dense layer with 128 units, dropout=0.2  
__Learning:__ LR=1e-4, early stop at 3, 100 epochs, no class weights    
__Result:__
F1Score all: `0.41347`  
Accuracy: `0.4923077`  
Grief, pride, relief has zero metrics    
# Without cut_neutral performed better, but slightly overfit. Try the same without class weights

---
_Using both optimize_low and class_weights all metrics have non-zero value, but total result is lower. accuracy is still not high enough.  
Try with maxsequence  
Try with Learning Rate exponential decay  
F1-Score weights?_