# bcLSTM Student Model Code Implementation

In this file you will see how we were able to implement the student model using a bidirectional LSTM Model.  

## Library instillation

In [None]:
!pip install datasets
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import RobertaModel, RobertaTokenizer, AdamW, RobertaTokenizerFast
import pandas as pd
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

Assuring the utlization of CUDA

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Using device: {device}')

Using device: cuda


Downloading the Data from huggingface

link: https://huggingface.co/datasets/roskoN/dailydialog

In [None]:
ds = load_dataset("roskoN/dailydialog")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/581 [00:00<?, ?B/s]

dailydialog.py:   0%|          | 0.00/4.59k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/3.67M [00:00<?, ?B/s]

full/validation/0000.parquet:   0%|          | 0.00/340k [00:00<?, ?B/s]

full/test/0000.parquet:   0%|          | 0.00/337k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/11118 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1000 [00:00<?, ? examples/s]

##Parsing Data

Parsing the data into Train, Validation and Testing.

In [None]:
DD_train_data = ds['train']
DD_val_data = ds['validation']
DD_test_data = ds['test']

#Data Processing Stage

##Training Data processing

In this step we are segmenting the dialogue into utterances with its respective emotions and associting speaker tokens to each uttereance for the training data.

In [None]:
# Initialize an empty list to store the data
data = []

# Function to map emotion indices to emotion labels (optional)
emotion_labels = {
    0: 'no_emotion',
    1: 'anger',
    2: 'disgust',
    3: 'fear',
    4: 'happiness',
    5: 'sadness',
    6: 'surprise'
}

def assign_speakers(utterances):
    speakers = []
    current_speaker = 'Speaker A'
    for i in range(len(utterances)):
        speakers.append(current_speaker)
        # Alternate speakers
        current_speaker = 'Speaker B' if current_speaker == 'Speaker A' else 'Speaker A'
    return speakers

# Iterate over each dialogue in the training data
for dialog in DD_train_data:
    utterances = dialog['utterances']
    emotions = dialog['emotions']
    dialog_id = dialog['id']

    # Ensure the number of utterances matches the number of emotions
    if len(utterances) != len(emotions):
        print(f"Length mismatch in dialogue {dialog_id}")
        continue  # Skip this dialogue or handle accordingly

    # Assign speakers
    speakers = assign_speakers(utterances)

    # Iterate over utterance-emotion pairs
    for utt, emo, speaker in zip(utterances, emotions, speakers):
        data.append({
            'dialogue_id': dialog_id,
            'utterance': utt,
            'emotion': emo,
            'emotion_label': emotion_labels.get(emo, 'unknown'),
            'speaker': speaker
        })

# Convert the list of dictionaries into a DataFrame
df_train = pd.DataFrame(data)
df_train = df_train.drop(columns=['dialogue_id'])

# Display the first few rows
df_train.head(100)

Unnamed: 0,utterance,emotion,emotion_label,speaker
0,"Say , Jim , how about going for a few beers af...",0,no_emotion,Speaker A
1,You know that is tempting but is really not go...,0,no_emotion,Speaker B
2,What do you mean ? It will help us to relax .,0,no_emotion,Speaker A
3,Do you really think so ? I don't . It will jus...,0,no_emotion,Speaker B
4,I guess you are right.But what shall we do ? I...,0,no_emotion,Speaker A
...,...,...,...,...
95,You look so tan and healthy !,4,happiness,Speaker A
96,Thanks . I just got back from summer camp .,4,happiness,Speaker B
97,How was it ?,0,no_emotion,Speaker A
98,Great . I got to try so many things for the fi...,4,happiness,Speaker B


##Validation Data processing

In this step we are segmenting the dialogue into utterances with its respective emotions and associting speaker tokens to each uttereance for the validation data.

In [None]:
data = []
# Iterate over each dialogue in the training data
for dialog in DD_val_data:
    utterances = dialog['utterances']
    emotions = dialog['emotions']
    dialog_id = dialog['id']

    # Ensure the number of utterances matches the number of emotions
    if len(utterances) != len(emotions):
        print(f"Length mismatch in dialogue {dialog_id}")
        continue  # Skip this dialogue or handle accordingly

    speakers = assign_speakers(utterances)

    # Iterate over utterance-emotion pairs
    for utt, emo, speaker in zip(utterances, emotions, speakers):
        data.append({
            'dialogue_id': dialog_id,
            'utterance': utt,
            'emotion': emo,
            'emotion_label': emotion_labels.get(emo, 'unknown'),
            'speaker': speaker
        })

# Convert the list of dictionaries into a DataFrame
df_val = pd.DataFrame(data)
df_val = df_val.drop(columns=['dialogue_id'])
# Display the first few rows
df_val.head(100)

Unnamed: 0,utterance,emotion,emotion_label,speaker
0,"Good morning , sir . Is there a bank near here ?",0,no_emotion,Speaker A
1,There is one . 5 blocks away from here ?,0,no_emotion,Speaker B
2,"Well , that's too far.Can you change some mone...",0,no_emotion,Speaker A
3,"Surely , of course . What kind of currency hav...",0,no_emotion,Speaker B
4,RIB .,0,no_emotion,Speaker A
...,...,...,...,...
95,That's him !,0,no_emotion,Speaker B
96,I'll call him and tell him you're here .,0,no_emotion,Speaker A
97,I appreciate your help .,0,no_emotion,Speaker B
98,Would you like to have a seat over there ? It'...,0,no_emotion,Speaker A


##Testing Data processing

In this step we are segmenting the dialogue into utterances with its respective emotions and associting speaker tokens to each uttereance for the testing data.

In [None]:
data = []
# Iterate over each dialogue in the training data
for dialog in DD_test_data:
    utterances = dialog['utterances']
    emotions = dialog['emotions']
    dialog_id = dialog['id']

    # Ensure the number of utterances matches the number of emotions
    if len(utterances) != len(emotions):
        print(f"Length mismatch in dialogue {dialog_id}")
        continue  # Skip this dialogue or handle accordingly

    speakers = assign_speakers(utterances)

    # Iterate over utterance-emotion pairs
    for utt, emo, speaker in zip(utterances, emotions, speakers):
        data.append({
            'dialogue_id': dialog_id,
            'utterance': utt,
            'emotion': emo,
            'emotion_label': emotion_labels.get(emo, 'unknown'),
            'speaker': speaker
        })

# Convert the list of dictionaries into a DataFrame
df_test = pd.DataFrame(data)
df_test = df_test.drop(columns=['dialogue_id'])
# Display the first few rows
df_test.head(100)

Unnamed: 0,utterance,emotion,emotion_label,speaker
0,"Hey man , you wanna buy some weed ?",0,no_emotion,Speaker A
1,Some what ?,6,surprise,Speaker B
2,"Weed ! You know ? Pot , Ganja , Mary Jane some...",0,no_emotion,Speaker A
3,"Oh , umm , no thanks .",0,no_emotion,Speaker B
4,I also have blow if you prefer to do a few lin...,0,no_emotion,Speaker A
...,...,...,...,...
95,I can't really deal with any distractions righ...,0,no_emotion,Speaker B
96,Sun-set hotel . May I help you ?,0,no_emotion,Speaker A
97,"Yes , I have booked a room for 24th . It's a d...",0,no_emotion,Speaker B
98,"Hold on , please . Let me check it for you . Y...",0,no_emotion,Speaker A


## Formating the input text

###Train input text

In [None]:
def prepare_input_text(utterance, speaker):
    # Using the suggestive text format with speaker tokens
    input_text = f"<s> {speaker} <mask> says: {utterance} </s>"
    return input_text

df_train['input_text'] = df_train.apply(lambda x: prepare_input_text(x['utterance'], x['speaker']), axis=1)
df_train.head()

Unnamed: 0,utterance,emotion,emotion_label,speaker,input_text
0,"Say , Jim , how about going for a few beers af...",0,no_emotion,Speaker A,"<s> Speaker A <mask> says: Say , Jim , how abo..."
1,You know that is tempting but is really not go...,0,no_emotion,Speaker B,<s> Speaker B <mask> says: You know that is te...
2,What do you mean ? It will help us to relax .,0,no_emotion,Speaker A,<s> Speaker A <mask> says: What do you mean ? ...
3,Do you really think so ? I don't . It will jus...,0,no_emotion,Speaker B,<s> Speaker B <mask> says: Do you really think...
4,I guess you are right.But what shall we do ? I...,0,no_emotion,Speaker A,<s> Speaker A <mask> says: I guess you are rig...


###Validation input text

In [None]:
df_val['input_text'] = df_val.apply(lambda x: prepare_input_text(x['utterance'], x['speaker']), axis=1)
df_val.head()

Unnamed: 0,utterance,emotion,emotion_label,speaker,input_text
0,"Good morning , sir . Is there a bank near here ?",0,no_emotion,Speaker A,"<s> Speaker A <mask> says: Good morning , sir ..."
1,There is one . 5 blocks away from here ?,0,no_emotion,Speaker B,<s> Speaker B <mask> says: There is one . 5 bl...
2,"Well , that's too far.Can you change some mone...",0,no_emotion,Speaker A,"<s> Speaker A <mask> says: Well , that's too f..."
3,"Surely , of course . What kind of currency hav...",0,no_emotion,Speaker B,"<s> Speaker B <mask> says: Surely , of course ..."
4,RIB .,0,no_emotion,Speaker A,<s> Speaker A <mask> says: RIB . </s>


###Test input text

In [None]:
df_test['input_text'] = df_test.apply(lambda x: prepare_input_text(x['utterance'], x['speaker']), axis=1)
df_test.head()

Unnamed: 0,utterance,emotion,emotion_label,speaker,input_text
0,"Hey man , you wanna buy some weed ?",0,no_emotion,Speaker A,"<s> Speaker A <mask> says: Hey man , you wanna..."
1,Some what ?,6,surprise,Speaker B,<s> Speaker B <mask> says: Some what ? </s>
2,"Weed ! You know ? Pot , Ganja , Mary Jane some...",0,no_emotion,Speaker A,<s> Speaker A <mask> says: Weed ! You know ? P...
3,"Oh , umm , no thanks .",0,no_emotion,Speaker B,"<s> Speaker B <mask> says: Oh , umm , no thank..."
4,I also have blow if you prefer to do a few lin...,0,no_emotion,Speaker A,<s> Speaker A <mask> says: I also have blow if...


##Tokenization and Attention Mask

###Trian Data and Validation Tokenization

In [None]:
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-large')

def tokenize_function(examples):
    #return tokenizer(examples['input_text'], truncation=True, padding='max_length', max_length=128)
    return tokenizer(examples['input_text'], truncation=True, padding='max_length', max_length=128, return_tensors='pt')
'''
def prepare_input_text(utterance, speaker):
    input_text = f"<s> {speaker} <mask> says: {utterance} </s>"
    return input_text

# Tokenize the dataset
df_train['tokenized'] = df_train['input_text'].apply(lambda x: tokenize_function({'input_text': x}))
df_train['input_ids'] = df_train['tokenized'].apply(lambda x: x['input_ids'])
df_train['attention_mask'] = df_train['tokenized'].apply(lambda x: x['attention_mask'])
df_train['input_text'] = df_train.apply(lambda x: prepare_input_text(x['utterance'], x['speaker']), axis=1)

'''

df_train['tokenized'] = df_train['input_text'].apply(lambda x: tokenize_function({'input_text': x}))
df_train['input_ids'] = df_train['tokenized'].apply(lambda x: x['input_ids'].squeeze().tolist())
df_train['attention_mask'] = df_train['tokenized'].apply(lambda x: x['attention_mask'].squeeze().tolist())

df_val['tokenized'] = df_val['input_text'].apply(lambda x: tokenize_function({'input_text': x}))
df_val['input_ids'] = df_val['tokenized'].apply(lambda x: x['input_ids'].squeeze().tolist())
df_val['attention_mask'] = df_val['tokenized'].apply(lambda x: x['attention_mask'].squeeze().tolist())


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

####Test Data Tokenization

In [None]:
df_test['tokenized'] = df_test['input_text'].apply(lambda x: tokenize_function({'input_text': x}))
df_test['input_ids'] = df_test['tokenized'].apply(lambda x: x['input_ids'])
df_test['attention_mask'] = df_test['tokenized'].apply(lambda x: x['attention_mask'])
df_test['input_text'] = df_test.apply(lambda x: prepare_input_text(x['utterance'], x['speaker']), axis=1)
df_test.head()

Unnamed: 0,utterance,emotion,emotion_label,speaker,input_text,tokenized,input_ids,attention_mask
0,"Hey man , you wanna buy some weed ?",0,no_emotion,Speaker A,"<s> Speaker A <mask> says: Hey man , you wanna...","[input_ids, attention_mask]","[[tensor(0), tensor(0), tensor(6358), tensor(8...","[[tensor(1), tensor(1), tensor(1), tensor(1), ..."
1,Some what ?,6,surprise,Speaker B,<s> Speaker B <mask> says: Some what ? </s>,"[input_ids, attention_mask]","[[tensor(0), tensor(0), tensor(6358), tensor(1...","[[tensor(1), tensor(1), tensor(1), tensor(1), ..."
2,"Weed ! You know ? Pot , Ganja , Mary Jane some...",0,no_emotion,Speaker A,<s> Speaker A <mask> says: Weed ! You know ? P...,"[input_ids, attention_mask]","[[tensor(0), tensor(0), tensor(6358), tensor(8...","[[tensor(1), tensor(1), tensor(1), tensor(1), ..."
3,"Oh , umm , no thanks .",0,no_emotion,Speaker B,"<s> Speaker B <mask> says: Oh , umm , no thank...","[input_ids, attention_mask]","[[tensor(0), tensor(0), tensor(6358), tensor(1...","[[tensor(1), tensor(1), tensor(1), tensor(1), ..."
4,I also have blow if you prefer to do a few lin...,0,no_emotion,Speaker A,<s> Speaker A <mask> says: I also have blow if...,"[input_ids, attention_mask]","[[tensor(0), tensor(0), tensor(6358), tensor(8...","[[tensor(1), tensor(1), tensor(1), tensor(1), ..."


#bcLSTM Model Archetcture

In [None]:
class BiLSTMStudentModel(nn.Module):
    def __init__(self, input_size, lstm_hidden_size, num_classes):
        super(BiLSTMStudentModel, self).__init__()
        # Embedding layer to map input IDs to embeddings
        self.embedding = nn.Embedding(input_size, lstm_hidden_size)
        self.lstm = nn.LSTM(input_size=lstm_hidden_size, hidden_size=lstm_hidden_size,
                            num_layers=1, bidirectional=True, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(lstm_hidden_size * 2, num_classes)  # *2 for bidirectional

    def forward(self, input_ids):
        # Embed input IDs
        embeddings = self.embedding(input_ids)
        # Pass embeddings through BiLSTM
        lstm_out, _ = self.lstm(embeddings)
        pooled_output = torch.mean(lstm_out, dim=1)  # Mean pooling over time
        logits = self.fc(self.dropout(pooled_output))  # Shape: [batch_size, num_classes]
        return logits


'\nclass BiLSTMStudentModel(nn.Module):\n    def __init__(self, input_size, lstm_hidden_size, num_classes):\n        super(BiLSTMStudentModel, self).__init__()\n        self.lstm = nn.LSTM(input_size=input_size, hidden_size=lstm_hidden_size,\n                            num_layers=1, bidirectional=True, batch_first=True)\n        self.dropout = nn.Dropout(0.3)\n        self.fc = nn.Linear(lstm_hidden_size * 2, num_classes)  # *2 for bidirectional\n\n    def forward(self, teacher_embeddings):\n        # Pass teacher embeddings through BiLSTM\n        lstm_out, _ = self.lstm(teacher_embeddings)  # Shape: [batch_size, seq_len, lstm_hidden_size * 2]\n        pooled_output = torch.mean(lstm_out, dim=1)  # Mean pooling over time\n        logits = self.fc(self.dropout(pooled_output))  # Shape: [batch_size, num_classes]\n        return logits\n'

#Dataloading

In [None]:
class EmotionDataset(Dataset):
    def __init__(self, dataframe):
        self.input_ids = list(dataframe['input_ids'])
        self.attention_masks = list(dataframe['attention_mask'])
        self.labels = list(dataframe['emotion'])  # Assuming this is the integer label column

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
            'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }


# Train DataLoader
train_dataset = EmotionDataset(df_train)  # Ensure df_train is correctly tokenized and formatted
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Validation DataLoader (optional)
val_dataset = EmotionDataset(df_val)  # Ensure df_val is tokenized
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

#Saving Checkpoint

(in case of crashes)

In [None]:
#NEW

import os

# Directory to save checkpoints
checkpoint_dir = "/content/drive/My Drive/colab_checkpoints_student_model"
os.makedirs(checkpoint_dir, exist_ok=True)

def save_checkpoint(epoch, model, optimizer, loss, student_logits_storage, path="checkpoint.pth"):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
        'student_logits_storage': student_logits_storage
    }
    torch.save(checkpoint, path)
    print(f"Checkpoint saved at {path} (Epoch {epoch + 1})")

def load_checkpoint(path, model, optimizer=None):
    if os.path.exists(path):
        print(f"Loading checkpoint from {path}...")
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['model_state_dict'])
        if optimizer:  # Load optimizer state if provided
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1  # Resume from the next epoch
        loss = checkpoint['loss']
        print(f"Checkpoint loaded. Resuming from epoch {start_epoch}")
        return start_epoch, checkpoint['avg_loss'], checkpoint['accuracy'], student_logits_storage
    else:
        print(f"No checkpoint found at {path}. Starting from scratch.")
        return 0, None, None, []



#Training

In [None]:
#NEW

# Initialize the student model
input_size = tokenizer.vocab_size  # Adjust input size as necessary
hidden_size = 256  # Define hidden layer size
num_classes = 7  # Number of emotion classes
student_model = BiLSTMStudentModel(input_size, hidden_size, num_classes).to(device)

# Optimizer
optimizer = torch.optim.AdamW(student_model.parameters(), lr=1e-4)

# Check for existing checkpoint
checkpoint_path = os.path.join(checkpoint_dir, "student_model_epoch_3.pt")  # Change as per latest epoch

start_epoch, last_loss, prev_accuracy, student_logits_storage = load_checkpoint(checkpoint_path, student_model, optimizer)


# Ensure logits storage exists if resuming from scratch
if not student_logits_storage:
    student_logits_storage = []


# Training loop
epochs = 10
student_model.train()

for epoch in range(start_epoch, epochs):
    total_loss = 0
    total_correct = 0
    total_samples = 0

    epoch_student_logits = []

    for batch_idx, batch in enumerate(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        logits = student_model(input_ids=input_ids)

        # Loss computation
        loss = torch.nn.CrossEntropyLoss()(logits, labels)
        total_loss += loss.item()

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accuracy computation
        predicted_labels = torch.argmax(logits, dim=1)
        total_correct += (predicted_labels == labels).sum().item()
        total_samples += labels.size(0)

        # Print progress every 500 batches
        if (batch_idx + 1) % 500 == 0:
            print(f"Processed {batch_idx + 1}/{len(train_loader)} batches.")

    # Ensure the last batch is printed
    print(f"Processed {len(train_loader)}/{len(train_loader)} batches (last batch).")

    student_logits_storage.append(epoch_student_logits)

    avg_loss = total_loss / len(train_loader)
    accuracy = total_correct / total_samples
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")

    save_checkpoint(epoch, student_model, optimizer, avg_loss, student_logits_storage=student_logits_storage, path=f"student_model_epoch_{epoch+1}.pth")

No checkpoint found at /content/drive/My Drive/colab_checkpoints_student_model/student_model_epoch_3.pt. Starting from scratch.
Processed 500/2725 batches.
Processed 1000/2725 batches.
Processed 1500/2725 batches.
Processed 2000/2725 batches.
Processed 2500/2725 batches.
Processed 2725/2725 batches (last batch).
Epoch 1/10, Loss: 0.5880, Accuracy: 0.8283
Checkpoint saved at student_model_epoch_1.pth (Epoch 1)
Processed 500/2725 batches.
Processed 1000/2725 batches.
Processed 1500/2725 batches.
Processed 2000/2725 batches.
Processed 2500/2725 batches.
Processed 2725/2725 batches (last batch).
Epoch 2/10, Loss: 0.4970, Accuracy: 0.8431
Checkpoint saved at student_model_epoch_2.pth (Epoch 2)
Processed 500/2725 batches.
Processed 1000/2725 batches.
Processed 1500/2725 batches.
Processed 2000/2725 batches.
Processed 2500/2725 batches.
Processed 2725/2725 batches (last batch).
Epoch 3/10, Loss: 0.4499, Accuracy: 0.8548
Checkpoint saved at student_model_epoch_3.pth (Epoch 3)
Processed 500/272

#Testing

In [None]:
# Validation DataLoader (optional)
test_dataset = EmotionDataset(df_test)  # Ensure df_val is tokenized
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
#Evaluation
student_model.eval()
correct = 0
total = 0
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        input_ids = input_ids.squeeze(1)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        logits = student_model(input_ids=input_ids)
        preds = torch.argmax(logits, dim=1)

        # Collect predictions and labels for metrics
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        # Compute accuracy
        correct += (preds == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")


  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


Test Accuracy: 84.42%
