In [1]:
# Importing the libraries needed
import pandas as pd
import numpy as np
import torch
import seaborn as sns
import transformers
import json
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizer
import logging
from transformers_interpret import MultiLabelClassificationExplainer
logging.basicConfig(level=logging.ERROR)
from torch import cuda
from lib.dataset_utils import *
from lib.plot_utils import *
from lib.models import *
from lib.cross_validation import *
from sklearn.metrics import accuracy_score, jaccard_score, f1_score, classification_report
import matplotlib.pyplot as plt
from wordcloud import WordCloud

ModuleNotFoundError: No module named 'transformers_interpret'

In [None]:
# Defining some key variables that will be used later on in the training
DATASET_NAME = DatasetEnum.GoEmotionsCleaned
DATASET_NAME = 'GoEmotions'
MODEL_NAME = 'Roberta'
CHECKPOINT_DIR = './checkpoints/' + DATASET_NAME + '/'
CHECKPOINT_MODEL_FILE = CHECKPOINT_DIR + DATASET_NAME + '_' + MODEL_NAME + '.pth'
MINIBATCH_SIZE = 16
EPOCHS = 6
LAMBDA = 1e-04
LEARNING_RATE = 5e-05
FROZEN_LAYERS = 9
# Creating the loss function and optimizer
loss_function = torch.nn.BCEWithLogitsLoss()
train_df, val_df, test_df = load_dataset(DATASET_NAME)
LABEL_COLS = train_df.columns[1:].tolist()

In [None]:
# tuning implicitly done in score calculation :)
def accuracy(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, accuracy_score)
    return best_res

def jaccard(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, jaccard_score, {'average': 'macro', 'zero_division': 0})
    return best_res

def jaccard_samples(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, jaccard_score, {'average': 'samples', 'zero_division': 0})
    return best_res

def f1(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, f1_score, {'average': 'macro', 'zero_division': 0})
    return best_res

def f1_micro(y_true, y_pred):
    _, best_res = tune_sigmoid_threshold(y_true, y_pred, f1_score, {'average': 'micro', 'zero_division': 0})
    return best_res
'''
weaker accuracy, each prediction is considered correct if its maximum probability class is one of the true classes
'''
def membership_score(y_true, y_pred):
    n_correct = 0
    for t_pattern, p_pattern in zip(y_true, y_pred):
        n_correct += t_pattern[np.argmax(p_pattern)] == 1
    return n_correct / len(y_true)

In [None]:
def plot_learning_curves(tr_loss, val_loss, score_name = 'loss'):
    plt.plot(tr_loss, label='train')
    plt.plot(val_loss, label='validation', color='orange', linestyle='--')
    plt.xlabel('epoch')
    plt.ylabel(score_name)
    plt.legend()
    plt.title(f'{score_name} over epochs')
    plt.show()

def model_analysis(model, train_df, val_df, target_cols, test_df=None, checkpoint_path=None, checkpoint_score='f1_macro', checkpoint_score_maximize=True):
    model.fit(train_df, validation_df=val_df, progress_bar_epoch=True, progress_bar_step=False, checkpoint_path=checkpoint_path, checkpoint_score=checkpoint_score, checkpoint_score_maximize=checkpoint_score_maximize)
    # plot learning curves
    tr_scores, val_scores = model.get_train_scores(), model.get_val_scores()
    tr_loss, val_loss = model.get_train_loss(), model.get_val_loss()
    plot_learning_curves(tr_loss, val_loss)
    plot_learning_curves(tr_scores['f1_macro'], val_scores['f1_macro'], 'Macro F1')
    # get predictions on validation set
    out = model.predict(val_df)
    target = val_df[target_cols].values
    # plot threshold tuning
    plot_threshold_tuning(target, out, plot=True)
    plot_threshold_tuning(target, out, plot=True, metric_params={'average':'micro', 'zero_division':0}, metric_fun=f1_score, metric_name='F1 Score')
    plot_threshold_tuning(target, out, plot=True, metric_params={'average':'macro', 'zero_division':0}, metric_fun=f1_score, metric_name='F1 Score')
    # get best threshold
    thresh, _ = tune_sigmoid_threshold(target, out, metric_params={'average':'macro', 'zero_division':0}, metric_fun=f1_score)
    # plot the confusion matrix for the best threshold
    best_out = (out > thresh).astype(int)
    plot_multilabel_confusion_heatmap(target, best_out, label_true=target_cols, label_pred=target_cols, normalize=True)
    # bar plot over classes
    plot_score_barplot(target, best_out, target_cols)
    # print classification report
    print(classification_report(target, best_out, target_names=target_cols))
    # print additional metrics
    print('Jaccard Samples Score:', jaccard_score(target, best_out, zero_division=0, average='samples', labels=target_cols))
    print('Jaccard Macro Score:', jaccard_score(target, best_out, zero_division=0, average='macro', labels=target_cols))
    print('Membership Score:', membership_score(target, out))
    if test_df is not None:
        # print results on test set using threshold from validation set
        # get predictions on test set
        out = model.predict(test_df)
        target = test_df[target_cols].values
        # plot the confusion matrix for the best threshold
        best_out = (out > thresh).astype(int)
        plot_multilabel_confusion_heatmap(target, best_out, label_true=target_cols, label_pred=target_cols, normalize=True)
        # bar plot over classes
        plot_score_barplot(target, best_out, target_cols)
        # print classification report
        print(classification_report(target, best_out, target_names=target_cols))
        # print additional metrics
        print('Jaccard Samples Score:', jaccard_score(target, best_out, zero_division=0, average='samples', labels=target_cols))
        print('Jaccard Macro Score:', jaccard_score(target, best_out, zero_division=0, average='macro', labels=target_cols))
        print('Membership Score:', membership_score(target, out))

Preliminary attempts

In [None]:
# model parameters
PARAMS = {'batch_size':MINIBATCH_SIZE, 
          'learning_rate':LEARNING_RATE, 
          'epochs':EPOCHS, 
          'loss_function':loss_function, 
          'regularization':LAMBDA,
          'n_classes':len(LABEL_COLS),
          'frozen_layers':FROZEN_LAYERS}
# scores dictionary
SCORES = {'accuracy': accuracy,
          'jaccard_macro': jaccard,
          'f1_macro': f1,
          'jaccard_samples': jaccard_samples,
          'f1_micro':f1_micro,
          'membership':membership_score}
# creating the model
model = Roberta(SCORES,
                PARAMS)
model_analysis(model, train_df, val_df, LABEL_COLS, checkpoint_path=CHECKPOINT_MODEL_FILE)

# Cross Validation

In [None]:
params_to_explore = {
    'learning_rate': [5e-05, 1e-05],
    'regularization': [0, 1e-05],
    'batch_size': [16, 32],
    'epochs': [6],
    'frozen_layers': [FROZEN_LAYERS],
    'n_classes': [len(LABEL_COLS)],
}
RESULT_DIR = './results/' + DATASET_NAME + '/'
RESULT_FILE = RESULT_DIR + DATASET_NAME + '_' + MODEL_NAME + '.csv'
# create the grid search object
grid_search = HoldOutCrossValidation(Roberta, SCORES, train_df, val_df, param_dict=params_to_explore, res_file=RESULT_FILE)
# run the grid search
grid_search.run()

In [None]:
# load the results
results = grid_search.get_results()
# get the best parameters and scores according to F1 macro
best_info = grid_search.get_best_info('f1_macro')
print("BEST MODEL INFO")
print(best_info)
# print the results
print("RESULTS")
print(results)
print("BEST PARAMS")
BEST_PARAMS = grid_search.get_best_params('f1_macro')
BEST_PARAMS

In [None]:
# create the model with the best parameters
model = Roberta(SCORES, BEST_PARAMS)
model_analysis(model, train_df, val_df, LABEL_COLS, test_df, checkpoint_path=CHECKPOINT_MODEL_FILE)

## Feature importance

In [None]:
RESULT_FILE = RESULT_DIR + DATASET_NAME + '_' + MODEL_NAME + '_features.csv'
TOP_N = 30

## Attempt on grouped emotions

In [None]:
CHECKPOINT_MODEL_FILE = CHECKPOINT_DIR + DATASET_NAME + '_' + 'Grouped' + '_' + MODEL_NAME + '.pth'

In [None]:
# map dataset emotions to twitter
train_df = goemotions_apply_emotion_mapping(train_df)
val_df = goemotions_apply_emotion_mapping(val_df)
test_df = goemotions_apply_emotion_mapping(test_df)

In [None]:
# model analysis with twitter mapped emotions
model = Roberta(SCORES, BEST_PARAMS)
model_analysis(model, train_df, val_df, LABEL_COLS, test_df, checkpoint_path=CHECKPOINT_MODEL_FILE)

## Feature importance on grouped dataset

In [None]:
RESULT_FILE = RESULT_DIR + DATASET_NAME + '_' + 'Grouped' + '_' + MODEL_NAME + '_features.csv'
TOP_N = 30