In [None]:
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import AutoModelForSequenceClassification
from transformers import get_scheduler
from sklearn.metrics import f1_score, classification_report, accuracy_score
torch.cuda.is_available()


The program can work with any of the four datasets with minor modifications. This program is adapted for the relabeled version of the mixed-label dataset. The program is written and commented with the help of ChatGPT and Copilot.

In [None]:
dataset = load_dataset("Statistikkprosjekt/Mixed")
#dataset = load_dataset("ltg/norec_sentence","ternary")
num_classes = 4
dataset

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("ltg/norbert3-base")

def tokenize_function(examples):
    return tokenizer(examples["review"], padding="max_length", truncation=True, max_length = 90, return_tensors="pt")



In [None]:

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
num_epochs = 10


In [None]:
# models is where the BiGRU model will be saved 


models = [
     "example_folder/example_model1.pth", "example_folder/example_model2.pth", "example_folder/example_model3.pth", "example_folder/example_model4.pth", "example_folder/example_model5.pth", "example_folder/example_model6.pth", "example_folder/example_model7.pth", "example_folder/example_model8.pth", "example_folder/example_model9.pth", "example_folder/example_model10.pth", "example_folder/example_model11.pth", "example_folder/example_model12.pth", "example_folder/example_model13.pth", "example_folder/example_model14.pth", "example_folder/example_model15.pth"
          ]
        




models

In [None]:
for i in models:
    last = 130
    print(i)
    best = 240
 

    tokenized_datasett = dataset.map(tokenize_function, batched=True)





    tokenized_datasett  = tokenized_datasett.remove_columns(["review"])


    tokenized_datasett = tokenized_datasett.rename_column("polarity", "labels")
  

    tokenized_datasett.set_format("torch")
  
    small_train_dataset = tokenized_datasett["train"].shuffle()
    small_eval_dataset = tokenized_datasett["validation"]

    train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=14)
    eval_dataloader = DataLoader(small_eval_dataset, batch_size=20)
    num_training_steps = num_epochs * len(train_dataloader)

    model = AutoModelForSequenceClassification.from_pretrained(
    "ltg/norbert3-large", trust_remote_code=True, num_labels=4, dropout=0.1, attention_probs_dropout_prob=0.1, hidden_dropout_prob=0.1)

    # Initialize AdamW optimizer and learning rate scheduler
    optimizer = AdamW(model.parameters(), lr=6e-6) 
    lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

    # Move model to the device (GPU)
    model.to(device)
    # lists for plotting
    list_f1 = []
    list_accuracy = []
    list_train_loss = []
    list_eval_loss = []
    Counter = 0
    
    for epoch in range(num_epochs):
        
        average_loss = 0   
        model.train()
        loss_train = 0
        for batch in train_dataloader:
            
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            
            loss.backward()
            loss_train += loss.item()
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
        average_loss = loss_train / len(train_dataloader)
        list_train_loss.append(average_loss)
        model.eval()
        total_loss = 0.0
        total_samples = 0
        all_predictions = []
        all_labels = []
        for batch in eval_dataloader:
                
                batch = {k: v.to(device) for k, v in batch.items()}
               
                with torch.no_grad():
                    outputs = model(**batch)

                logits = outputs.logits
                predictions = torch.argmax(logits, dim=-1)
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(batch['labels'].cpu().numpy())

                total_loss += outputs.loss.item() * batch['labels'].size(0)  # Access size of 'images' tensor 
                total_samples += batch['labels'].size(0)


            # Calculate average loss over all samples
        f1 = f1_score(all_labels, all_predictions, average='macro')
        ac = accuracy_score(all_labels, all_predictions)
        list_f1.append(f1)
        list_accuracy.append(ac)
        average_loss = total_loss / total_samples
        list_eval_loss.append(average_loss)
        print(f'Evaluation Loss: {average_loss}')    

 
        

        if average_loss < last:
            
            Counter = 0
            if average_loss < best:
                torch.save(model, i)
                best = average_loss
           
        last = average_loss
        Counter +=1
        if Counter > 2:   
            print(epoch)         
            break

    model = torch.load(i)
    model.to(device)
    model.eval()

    model.eval()

    all_predictions = []
    all_labels = []
    
    # Disable gradient computation during evaluation
    for batch in eval_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)


            # Forward pass
            
            logits = outputs.logits
            # Calculate predictions
            predictions = torch.argmax(logits, dim=1)

            # Collect predictions and labels
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(batch["labels"].cpu().numpy())

    # Calculate F1 score
    f1 = f1_score(all_labels, all_predictions, average='macro')

    print(f"Test F1 Score: {f1:.4f}")

    # Optionally, you can print a detailed classification report
    report = classification_report(all_labels, all_predictions)
    print("Classification Report:\n", report)
        


 




In [None]:
model = torch.load(models[0])
model.to(device)
model.eval()



In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


model.eval()

all_predictions = []
all_labels = []

# Disable gradient computation during evaluation
for batch in eval_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)


        # Forward pass
        
        logits = outputs.logits
        # Calculate predictions
        predictions = torch.argmax(logits, dim=1)

        # Collect predictions and labels
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(batch["labels"].cpu().numpy())

# Calculate F1 score
f1 = accuracy_score(all_labels, all_predictions)

print(f"Test ac Score: {f1:.4f}")



# Calculate confusion matrix
conf_matrix = confusion_matrix(all_labels, all_predictions)

# Display the confusion matrix
plt.figure(figsize=(10, 8))
c = sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Neutral","Positive","Negative","Mixed"], yticklabels=["Neutral","Positive","Negative","Mixed"])
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
c.collections[0].colorbar.remove()
plt.ylabel("True Label")
plt.show() 

# Optionally, you can print a detailed classification report
report = classification_report(all_labels, all_predictions)
print("Classification Report:\n", report)

In [None]:
# Calculate percentages for each true label
total_true_labels = np.sum(conf_matrix, axis=1)
percentages = (conf_matrix / total_true_labels[:, np.newaxis]) * 100

# Replace NaN values with 0 (for cases where the true label count is 0)
percentages = np.nan_to_num(percentages)

In [None]:
a = sns.heatmap(percentages, annot=True, fmt="f", cmap="Blues", xticklabels=["Neutral","Positive","Negative","Mixed"], yticklabels=["Neutral","Positive","Negative","Mixed"])
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")

a.collections[0].colorbar.remove()
plt.show()

In [None]:


import plotly.express as px
import pandas as pd
df = pd.DataFrame(list_f1, columns = ['f1'])
df['accuracy'] = list_accuracy
df['train_loss'] = list_train_loss
df['eval_loss'] = list_eval_loss
df['epoch'] = range(1,11)


In [None]:

plt = px.line(df, x='epoch', y='f1', title='F1 Macro Score Across Epochs on Validation Set of Mixed-Label Dataset',
              labels={"epoch": "Number of Epochs", "f1": "Macro F1"}, 
              template='plotly_white')

plt.update_traces(line=dict(width=2.5, color='darkred'), 
                  mode='lines', 
                  marker=dict(size=8, color='LightSkyBlue', line=dict(width=2, color='DarkSlateGrey')))
plt.update_layout(title_font_size=24, title_x=0.5, 
                  xaxis_title_font=dict(size=18), yaxis_title_font=dict(size=18),
                  xaxis_gridcolor='gray', yaxis_gridcolor='gray')

plt.show()


In [None]:
plt = px.line(df, x='epoch', y='accuracy', title='Accuracy  Across Epochs on Validation Set of Mixed-Label Dataset',
              labels={"epoch": "Number of Epochs", "accuracy": "Accuracy"},
              template='plotly_white')

plt.update_traces(line=dict(width=2.5, color='darkred'), 
                  mode='lines', 
                  marker=dict(size=8, color='LightSkyBlue', line=dict(width=2, color='DarkSlateGrey')))
plt.update_layout(title_font_size=24, title_x=0.5, 
                  xaxis_title_font=dict(size=18), yaxis_title_font=dict(size=18),
                  xaxis_gridcolor='gray', yaxis_gridcolor='gray')

plt.show()

In [None]:
plt = px.line(df, x='epoch', y='train_loss', title='Loss Across Epochs on Train Set of Mixed-Label Dataset',
              labels={"epoch": "Number of Epochs", "train_loss": "Training Loss"},
              template='plotly_white')

plt.update_traces(line=dict(width=2.5, color='darkred'), 
                  mode='lines', 
                  marker=dict(size=8, color='LightSkyBlue', line=dict(width=2, color='DarkSlateGrey')))
plt.update_layout(title_font_size=24, title_x=0.5, 
                  xaxis_title_font=dict(size=18), yaxis_title_font=dict(size=18),
                  xaxis_gridcolor='gray', yaxis_gridcolor='gray')

plt.show()

In [None]:


plt = px.line(df, x='epoch', y='eval_loss', title='Evaluation Loss Across Epochs on Validation Set of Mixed-Label Dataset',
              labels={"epoch": "Number of Epochs", "eval_loss": "Evaluation Loss"},
              template='plotly_white')

plt.update_traces(line=dict(width=2.5, color='darkred'), 
                  mode='lines', 
                  marker=dict(size=8, color='LightSkyBlue', line=dict(width=2, color='DarkSlateGrey')))
plt.update_layout(title_font_size=24, title_x=0.5, 
                  xaxis_title_font=dict(size=18), yaxis_title_font=dict(size=18),
                  xaxis_gridcolor='gray', yaxis_gridcolor='gray')

plt.show()