In [None]:
# Importing the libraries needed
import pandas as pd
import numpy as np
import torch
import seaborn as sns
import transformers
import json
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizer
import logging
logging.basicConfig(level=logging.ERROR)
from torch import cuda
from lib.dataset_utils import *
from lib.plot_utils import *
from lib.models import *
from sklearn.metrics import accuracy_score, jaccard_score, f1_score, multilabel_confusion_matrix
import matplotlib.pyplot as plt

- Minibatch
- learning rate
- momentum
- regularization
- dropout?
- topologia
- optimizer?

In [None]:
# Defining some key variables that will be used later on in the training
DATASET_NAME = DatasetEnum.GoEmotionsCleaned
MINIBATCH_SIZE = 16
EPOCHS = 1
LEARNING_RATE = 5e-05
FROZEN_LAYERS = 9
# Creating the loss function and optimizer
loss_function = torch.nn.BCEWithLogitsLoss()
train_df, val_df, test_df = load_dataset(DATASET_NAME)
MAX_LEN = compute_max_tokens([train_df, val_df, test_df], RobertaTokenizer.from_pretrained('roberta-base'))

In [None]:
train_df.head()

In [None]:
THRESHOLD = 0.5
# tuning implicitly done in score calculation :)
def accuracy(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, accuracy_score)
    return best_res

def jaccard(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, jaccard_score, {'average': 'macro'})
    return best_res

def jaccard_samples(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, jaccard_score, {'average': 'samples'})
    return best_res

def f1(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, f1_score, {'average': 'macro'})
    return best_res

def f1_micro(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, f1_score, {'average': 'micro'})
    return best_res
'''
weaker accuracy, each prediction is considered correct it its maximum probability class is one of the true classes
'''
def membership_score(y_true, y_pred):
    n_correct = 0
    for t_pattern, p_pattern in zip(y_true, y_pred):
        n_correct += t_pattern[np.argmax(p_pattern)] == 1
    return n_correct / len(y_true)

In [None]:
model = SimpleModelInterface(RobertaClass(train_df.shape[1]-1, frozen_layers=FROZEN_LAYERS), {'accuracy': accuracy, 'jaccard_macro': jaccard, 'f1_macro': f1, 'jaccard_samples': jaccard_samples, 'f1_micro':f1_micro, 'membership':membership_score}, create_model_params(tokenizer_max_len=MAX_LEN, batch_size=MINIBATCH_SIZE, learning_rate=LEARNING_RATE, epochs=EPOCHS, loss_function=loss_function))

In [None]:
model.fit(train_df[:1000], progress_bar_epoch=True, progress_bar_step=True)#TODO use entire dataset

In [None]:
scores = model.evaluate(test_df[:1000])
scores

In [None]:
out = model.predict(test_df[:1000])
target = test_df[:1000].iloc[:, 1:].values

In [None]:
plot_threshold_tuning(target, out, plot=True)
plot_threshold_tuning(target, out, plot=True, metric_params={'average':'micro'}, metric_fun=f1_score, metric_name='F1 Score')
plot_threshold_tuning(target, out, plot=True, metric_params={'average':'macro'}, metric_fun=f1_score, metric_name='F1 Score')

In [None]:
# get best threshold
thresh, _ = tune_sigmoid_threshold(target, out, accuracy_score)

In [None]:
# plot the confusion matrix for the best threshold
best_out = (out > thresh).astype(int)
plot_multilabel_confusion_heatmap(target, best_out, label_true=test_df.columns[1:], label_pred=test_df.columns[1:], normalize=True)

In [None]:
# bar plot over classes
plot_score_barplot(target, best_out, test_df.columns[1:])

In [None]:
# TODO testing code of bootstrap testing
# bootstrap testing
n_bootstraps = 5
n_samples = 100
scores = []
bootstrap_test(model, model, test_df, n_bootstraps, n_samples, membership_score, 'membership')