In [None]:
import torch
torch.cuda.is_available()
from datasets import load_dataset
from datasets import DatasetDict
from datasets import Dataset


The program can work with any of the four datasets with minor modifications. This program is adapted for the relabeled version of the mixed-label dataset. The program is written and commented with the help of ChatGPT and Copilot.

In [None]:

dataset = load_dataset("Statistikkprosjekt/Mixed")
newDatasetTrain = []
newDatasetValidation = []
newDatasetTest = []

for i in dataset["train"]:
    newDatasetTrain.append({'review': i["review"], 'sentiment': str(i["polarity"])})

for i in dataset["validation"]:
    newDatasetValidation.append({'review': i["review"], 'sentiment': str(i["polarity"])})

for i in dataset["test"]:
    newDatasetTest.append({'review': i["review"], 'sentiment': str(i["polarity"])})


dataset["train"] = Dataset.from_list(newDatasetTrain)
dataset["validation"] = Dataset.from_list(newDatasetValidation)
dataset["test"] = Dataset.from_list(newDatasetTest)

type(dataset["train"][0]["sentiment"])

In [None]:

from transformers import AutoTokenizer

tokenizer = tokenizer = AutoTokenizer.from_pretrained("ltg/nort5-base", trust_remote_code=True)

def tokenize_function(examples):
    return tokenizer(examples["review"], padding="max_length", truncation=True, max_length = 90, return_tensors="pt")

def tokenize_label_function(examples):
    return tokenizer(examples["sentiment"], truncation=True, max_length = 1, return_tensors="pt")

In [None]:
from transformers import AutoModelForSeq2SeqLM 
from transformers import get_scheduler
import torch
from tqdm.auto import tqdm
from torch.utils.data import DataLoader
from torch.optim import AdamW

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
num_epochs = 10


In [None]:

learning_rates =  [3e-6 for i in range(15)]
learning_rates = sorted(learning_rates,reverse=False)
batch_sizes = [16 for model in learning_rates]
hidden_dropout_rates = [0.3 for model in learning_rates]
for i in range(len(learning_rates)):
    print("")
    print(f"Model {i+1}")
    last = 199
    best = 199

   
    tokenized_datasets = dataset.map(tokenize_label_function, batched=True) 
    
    tokenized_datasets = tokenized_datasets.rename_column("input_ids", "labels")
    tokenized_datasets = tokenized_datasets.remove_columns(["token_type_ids","attention_mask","sentiment"])
    tokenized_datasets = tokenized_datasets.map(tokenize_function, batched=True)
    tokenized_datasets = tokenized_datasets.remove_columns(["review","token_type_ids"])
    tokenized_datasets.set_format("torch")


    small_train_dataset = tokenized_datasets["train"].shuffle()
    small_eval_dataset = tokenized_datasets["validation"]

    
    train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=batch_sizes[i])
    eval_dataloader = DataLoader(small_eval_dataset, batch_size=30)
    num_training_steps = num_epochs * len(train_dataloader)

    model = AutoModelForSeq2SeqLM.from_pretrained(
    "ltg/nort5-large", trust_remote_code=True, hidden_dropout_prob = hidden_dropout_rates[i])

    optimizer = AdamW(model.parameters(), lr=learning_rates[i]) 

    lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
    model.to(device)

   
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}")
    
        average_loss = 0   
        model.train()
        for batch in train_dataloader:
            
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            
            loss.backward()

            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()

        model.eval()
        total_loss = 0.0
        total_samples = 0
        for batch in eval_dataloader:
                
                batch = {k: v.to(device) for k, v in batch.items()}
               
                with torch.no_grad():
                    outputs = model(**batch)

                logits = outputs.logits


                total_loss += outputs.loss.item() * batch['labels'].size(0)  # Access size of 'images' tensor 
                total_samples += batch['labels'].size(0)


            # Calculate average loss over all samples
        average_loss = total_loss / total_samples
        print(f'Evaluation Loss: {average_loss}')    

        if average_loss < last:
            
            Counter = 0
            if average_loss < best:
                torch.save(model, f"output/T5_{i}.pth")
                best = average_loss
           
        last = average_loss
        Counter +=1
        if Counter > 2:   
                print(epoch)         
                break

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


# Set the model to evaluation mode
model.eval()

all_predictions = []
all_labels = []

from sklearn.metrics import f1_score, classification_report, accuracy_score



# Disable gradient computation during evaluation
for batch in eval_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)


        # Forward pass
        
        logits = outputs.logits
        # Calculate predictions
        predictions = torch.argmax(logits, dim=1)

        # Collect predictions and labels
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(batch["labels"].cpu().numpy())

# Calculate F1 score
f1 = accuracy_score(all_labels, all_predictions)

print(f"Test F1 Score: {f1:.4f}")



# Calculate confusion matrix
conf_matrix = confusion_matrix(all_labels, all_predictions)

# Display the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=np.unique(all_labels), yticklabels=np.unique(all_labels))
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show() 

# Optionally, you can print a detailed classification report
report = classification_report(all_labels, all_predictions)
print("Classification Report:\n", report)

In [None]:
# Calculate percentages for each true label
total_true_labels = np.sum(conf_matrix, axis=1)
percentages = (conf_matrix / total_true_labels[:, np.newaxis]) * 100

# Replace NaN values with 0 (for cases where the true label count is 0)
percentages = np.nan_to_num(percentages)

In [None]:
sns.heatmap(percentages, annot=True, fmt="f", cmap="Blues", xticklabels=np.unique(all_labels), yticklabels=np.unique(all_labels))
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()