<a href="https://colab.research.google.com/github/Hillascher5/nlp-tweets-sentiment-analysis/blob/main/Evaluate_saved_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Evaluate Saved Best Models

Evaluate models of all types, From Trainer API and manual code models.

In [None]:
# # Needed for Google Colab
# !pip install --quiet evaluate transformers optuna datasets nltk scikit-learn
# !pip install numpy==1.26.4

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datasets import Dataset
from sklearn.metrics import classification_report
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import random
import torch
import nltk
nltk.download('stopwords')

import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
def set_seed_all(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed_all(42)

#### Load test set

In [None]:
df_test = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/nlp_project/Data/Corona_NLP_test_to_evaluate.csv", encoding='latin1')

In [None]:
# Mapping sentiments to unique numeric IDs
unique_labels = sorted(df_test["Sentiment"].unique())
label2id = {label: idx for idx, label in enumerate(unique_labels)}
df_test["label"] = df_test["Sentiment"].map(label2id)

In [None]:
# Minimal pre-processing
def light_preprocess(text):
    return text.strip()                             # Remove unnecessary spaces

is_preprocessed = "minimal_preprocess"
df_test["clean_text"] = df_test["OriginalTweet"].apply(light_preprocess)

In [None]:
# Convert DataFrame to Hugging Face Dataset
hf_test = Dataset.from_pandas(df_test[["clean_text", "label"]])

### Load saved models

In [None]:
model_path_hf = "/content/drive/MyDrive/Colab Notebooks/nlp_project/models/w_test_split/HF_Trainer/"
bert_model_name_hf = "bert_best_model_stratify_maxl_256_minimal_preprocess_5000_samples_optuna"
bert_model = AutoModelForSequenceClassification.from_pretrained(model_path_hf + bert_model_name_hf)
bert_tokenizer = AutoTokenizer.from_pretrained(model_path_hf + bert_model_name_hf)

roberta_model_name_hf = "roberta_best_model_stratify_maxl_256_minimal_preprocess_5000_samples_optuna"
roberta_model = AutoModelForSequenceClassification.from_pretrained(model_path_hf + roberta_model_name_hf)
roberta_tokenizer = AutoTokenizer.from_pretrained(model_path_hf + roberta_model_name_hf)

deberta_model_name_hf = "deberta_best_model_stratify_maxl_128_minimal_preprocess_5000_samples_optuna"
deberta_model = AutoModelForSequenceClassification.from_pretrained(model_path_hf + deberta_model_name_hf)
deberta_tokenizer = AutoTokenizer.from_pretrained(model_path_hf + deberta_model_name_hf)

model_path_manual = "/content/drive/MyDrive/Colab Notebooks/nlp_project/models/w_test_split/Manual_finetune_min_preproc_5000_samples_opt/"
bert_model_name_manual = "manual2hf_bert-base-uncased_5000_samples_opt"
bert_model_man = AutoModelForSequenceClassification.from_pretrained(model_path_manual + bert_model_name_manual)
bert_tokenizer_man = AutoTokenizer.from_pretrained(model_path_manual + bert_model_name_manual)

roberta_model_name_manual = "manual2hf_roberta-base_5000_samples_opt"
roberta_model_man = AutoModelForSequenceClassification.from_pretrained(model_path_manual + roberta_model_name_manual)
roberta_tokenizer_man = AutoTokenizer.from_pretrained(model_path_manual + roberta_model_name_manual)

deberta_model_name_manual = "manual2hf_deberta-base_5000_samples_opt"
deberta_model_man = AutoModelForSequenceClassification.from_pretrained(model_path_manual + deberta_model_name_manual)
deberta_tokenizer_man = AutoTokenizer.from_pretrained(model_path_manual + deberta_model_name_manual)

In [None]:
# Tokenize function
def tokenize_function_bert(examples):
    return bert_tokenizer(examples["clean_text"], truncation=True, padding='max_length', max_length=256)

def tokenize_function_roberta(examples):
    return roberta_tokenizer(examples["clean_text"], truncation=True, padding='max_length', max_length=256)

def tokenize_function_deberta(examples):
    return deberta_tokenizer(examples["clean_text"], truncation=True, padding='max_length', max_length=128)

def tokenize_function_bert_man(examples):
    return bert_tokenizer_man(examples["clean_text"], truncation=True, padding='max_length', max_length=256)

def tokenize_function_roberta_man(examples):
    return roberta_tokenizer_man(examples["clean_text"], truncation=True, padding='max_length', max_length=256)

def tokenize_function_deberta_man(examples):
    return deberta_tokenizer_man(examples["clean_text"], truncation=True, padding='max_length', max_length=128)

In [None]:
# Tokenize
tokenized_bert_test = hf_test.map(tokenize_function_bert, batched=True)
tokenized_bert_test.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_roberta_test = hf_test.map(tokenize_function_roberta, batched=True)
tokenized_roberta_test.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_deberta_test = hf_test.map(tokenize_function_deberta, batched=True)
tokenized_deberta_test.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_bert_test_man = hf_test.map(tokenize_function_bert_man, batched=True)
tokenized_bert_test_man.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_roberta_test_man = hf_test.map(tokenize_function_roberta_man, batched=True)
tokenized_roberta_test_man.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_deberta_test_man = hf_test.map(tokenize_function_deberta_man, batched=True)
tokenized_deberta_test_man.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)

    return {
        'accuracy': accuracy_score(labels, predictions),
        'f1_macro': f1_score(labels, predictions, average='macro'),
        'precision_macro': precision_score(labels, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(labels, predictions, average='macro'),
    }

In [None]:
# Define trainers
bert_trainer_hf = Trainer(model=bert_model, tokenizer=bert_tokenizer, compute_metrics=compute_metrics)
roberta_trainer_hf = Trainer(model=roberta_model, tokenizer=roberta_tokenizer, compute_metrics=compute_metrics)
deberta_trainer_hf = Trainer(model=deberta_model, tokenizer=deberta_tokenizer, compute_metrics=compute_metrics)

bert_trainer_man = Trainer(model=bert_model_man, tokenizer=bert_tokenizer_man, compute_metrics=compute_metrics)
roberta_trainer_man = Trainer(model=roberta_model_man, tokenizer=roberta_tokenizer_man, compute_metrics=compute_metrics)
deberta_trainer_man = Trainer(model=deberta_model_man, tokenizer=deberta_tokenizer_man, compute_metrics=compute_metrics)

In [None]:
# Evaluate
bert_metrics = bert_trainer_hf.evaluate(tokenized_bert_test)
print(bert_metrics)

roberta_metrics = roberta_trainer_hf.evaluate(tokenized_roberta_test)
print(roberta_metrics)

deberta_metrics = deberta_trainer_hf.evaluate(tokenized_deberta_test)
print(deberta_metrics)

bert_metrics_man = bert_trainer_man.evaluate(tokenized_bert_test_man)
print(bert_metrics_man)

roberta_metrics_man = roberta_trainer_man.evaluate(tokenized_roberta_test_man)
print(roberta_metrics_man)

deberta_metrics_man = deberta_trainer_man.evaluate(tokenized_deberta_test_man)
print(deberta_metrics_man)

In [None]:
# Define which metrics to keep
keys_to_include = [
    'eval_loss',
    'eval_accuracy',
    'eval_f1_macro',
    'eval_precision_macro',
    'eval_recall_macro'
]

# Organize metrics
metrics_table = pd.DataFrame([
    {"Model": "BERT (HF)", **{k: v for k, v in bert_metrics.items() if k in keys_to_include}},
    {"Model": "RoBERTa (HF)", **{k: v for k, v in roberta_metrics.items() if k in keys_to_include}},
    {"Model": "DeBERTa (HF)", **{k: v for k, v in deberta_metrics.items() if k in keys_to_include}},
    {"Model": "BERT (Manual)", **{k: v for k, v in bert_metrics_man.items() if k in keys_to_include}},
    {"Model": "RoBERTa (Manual)", **{k: v for k, v in roberta_metrics_man.items() if k in keys_to_include}},
    {"Model": "DeBERTa (Manual)", **{k: v for k, v in deberta_metrics_man.items() if k in keys_to_include}},
])

# Set index to model name for clarity
metrics_table.set_index("Model", inplace=True)

# Round values for better readability
metrics_table = metrics_table.round(4)

from IPython.display import display
display(metrics_table)

In [None]:
# Predictions for classification report
bert_preds = bert_trainer_hf.predict(tokenized_bert_test)
bert_y_pred = bert_preds.predictions.argmax(axis=-1)
bert_y_true = bert_preds.label_ids
print('BERT HF Trainer model:')
print(classification_report(bert_y_true, bert_y_pred, digits=4))

roberta_preds = roberta_trainer_hf.predict(tokenized_roberta_test)
roberta_y_pred = roberta_preds.predictions.argmax(axis=-1)
roberta_y_true = roberta_preds.label_ids
print('RoBERTa HF Trainer model:')
print(classification_report(roberta_y_true, roberta_y_pred, digits=4))

deberta_preds = deberta_trainer_hf.predict(tokenized_deberta_test)
deberta_y_pred = deberta_preds.predictions.argmax(axis=-1)
deberta_y_true = deberta_preds.label_ids
print('DeBERTa HF Trainer model:')
print(classification_report(deberta_y_true, deberta_y_pred, digits=4))

bert_preds_man = bert_trainer_man.predict(tokenized_bert_test_man)
bert_y_pred_man = bert_preds_man.predictions.argmax(axis=-1)
bert_y_true_man = bert_preds_man.label_ids
print('BERT manual code model:')
print(classification_report(bert_y_true_man, bert_y_pred_man, digits=4))

roberta_preds_man = roberta_trainer_man.predict(tokenized_roberta_test_man)
roberta_y_pred_man = roberta_preds_man.predictions.argmax(axis=-1)
roberta_y_true_man = roberta_preds_man.label_ids
print('RoBERTA manual code model:')
print(classification_report(roberta_y_true_man, roberta_y_pred_man, digits=4))

deberta_preds_man = deberta_trainer_man.predict(tokenized_deberta_test_man)
deberta_y_pred_man = deberta_preds_man.predictions.argmax(axis=-1)
deberta_y_true_man = deberta_preds_man.label_ids
print('DeBERTA manual code model:')
print(classification_report(deberta_y_true_man, deberta_y_pred_man, digits=4))


In [None]:
# from IPython.display import Javascript

# def disconnect_runtime():
#     display(Javascript('google.colab.kernel.disconnect()'))

# disconnect_runtime()