# Model training: BERT (multitasking)

---

## Table of Contents

1. [Imports](#imports)
2. [Data loading and splitting](#data-loading-and-splitting)
3. [Setting training parameters](#setting-training-parameters)
4. [Model training](#model-training)
5. [Model evaluation](#model-evaluation)
6. [Summary](#summary)
7. [Model serialization](#model-serialization)

# Imports

In [1]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
import torch
from transformers import BertTokenizer, BertModel
from torch.utils.data import DataLoader, Dataset
from sklearn.utils.class_weight import compute_class_weight
from transformers import AdamW
from torch.optim import lr_scheduler
import torch.nn as nn

# Data loading and splitting

In [2]:
base_dir = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), '..'))

In [3]:
train_file_sentiment = os.path.join(base_dir, 'train_sentiment.csv')
val_file_sentiment = os.path.join(base_dir, 'val_sentiment.csv')
test_file_sentiment = os.path.join(base_dir, 'test_sentiment.csv')

if not all([os.path.exists(train_file_sentiment), os.path.exists(val_file_sentiment), os.path.exists(test_file_sentiment)]):
    sentiment_df = pd.read_parquet('../../data/sentiment_without_outliers/sentiment_without_outliers.parquet')
    sentiment_df = sentiment_df.drop(columns=['text_length'])
    
    train_data_sentiment, temp_data = train_test_split(sentiment_df, test_size=0.3, stratify=sentiment_df['label'], random_state=42)
    val_data_sentiment, test_data_sentiment = train_test_split(temp_data, test_size=0.5, stratify=temp_data['label'], random_state=42)

    train_data_sentiment.to_csv(train_file_sentiment, index=False)
    val_data_sentiment.to_csv(val_file_sentiment, index=False)
    test_data_sentiment.to_csv(test_file_sentiment, index=False)
else:
    train_data_sentiment = pd.read_csv(train_file_sentiment)
    val_data_sentiment = pd.read_csv(val_file_sentiment)
    test_data_sentiment = pd.read_csv(test_file_sentiment)

In [4]:
train_file_emotion = os.path.join(base_dir, 'train_emotion.csv')
val_file_emotion = os.path.join(base_dir, 'val_emotion.csv')
test_file_emotion = os.path.join(base_dir, 'test_emotion.csv')

if not all([os.path.exists(train_file_emotion), os.path.exists(val_file_emotion), os.path.exists(test_file_emotion)]):
    emotion_df = pd.read_parquet('../../data/emotion_without_outliers/emotion_without_outliers.parquet')
    emotion_df = emotion_df.drop(columns=['text_length'])
    
    target_samples_per_class = 16_667  # 100k / 6 classes of emotions
    
    balanced_data = emotion_df.groupby('label', group_keys=False).apply(
        lambda x: x.sample(n=min(len(x), target_samples_per_class), random_state=42)
    )
    
    train_data_emotion, temp_data = train_test_split(balanced_data, test_size=0.3, stratify=balanced_data['label'], random_state=42)
    val_data_emotion, test_data_emotion = train_test_split(temp_data, test_size=0.5, stratify=temp_data['label'], random_state=42)

    train_data_emotion.to_csv(train_file_emotion, index=False)
    val_data_emotion.to_csv(val_file_emotion, index=False)
    test_data_emotion.to_csv(test_file_emotion, index=False)
else:
    train_data_emotion = pd.read_csv(train_file_emotion)
    val_data_emotion = pd.read_csv(val_file_emotion)
    test_data_emotion = pd.read_csv(test_file_emotion)

# Setting training parameters

Due to the uneven distribution of classes in the dataset, the classes will be weighted.

In [5]:
class_weights_sentiment = compute_class_weight('balanced', classes=pd.unique(train_data_sentiment['label']), y=train_data_sentiment['label'])
class_weights_sentiment = torch.tensor(class_weights_sentiment, dtype=torch.float)

In [6]:
class_weights_emotion = compute_class_weight('balanced', classes=pd.unique(train_data_emotion['label']), y=train_data_emotion['label'])
class_weights_emotion = torch.tensor(class_weights_emotion, dtype=torch.float)

In [7]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [8]:
class SentimentDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.texts = data['text']
        self.labels = data['label']
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        label = self.labels.iloc[idx]

        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            truncation=True,
            padding='max_length',
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'label_sentiment': torch.tensor(label, dtype=torch.long)
        }


class EmotionDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.texts = data['text']
        self.labels = data['label']
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        label = self.labels.iloc[idx]

        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            truncation=True,
            padding='max_length',
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'label_emotion': torch.tensor(label, dtype=torch.long)
        }


In [9]:
def create_data_loader(dataset_class, data, tokenizer, max_len, batch_size):
    dataset = dataset_class(data, tokenizer, max_len)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [10]:
train_loader_sentiment = create_data_loader(SentimentDataset, train_data_sentiment, tokenizer, max_len=256, batch_size=16)
val_loader_sentiment = create_data_loader(SentimentDataset, val_data_sentiment, tokenizer, max_len=256, batch_size=16)
test_loader_sentiment = create_data_loader(SentimentDataset, test_data_sentiment, tokenizer, max_len=256, batch_size=16)

In [11]:
train_loader_emotion = create_data_loader(EmotionDataset, train_data_emotion, tokenizer, max_len=256, batch_size=16)
val_loader_emotion = create_data_loader(EmotionDataset, val_data_emotion, tokenizer, max_len=256, batch_size=16)
test_loader_emotion = create_data_loader(EmotionDataset, test_data_emotion, tokenizer, max_len=256, batch_size=16)

# Model training

In [12]:
device = 'cuda'

In [13]:
class MultiTaskBERT(nn.Module):
    def __init__(self, num_labels_sentiment, num_labels_emotion):
        super(MultiTaskBERT, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.classifier_sentiment = nn.Linear(self.bert.config.hidden_size, num_labels_sentiment)
        self.classifier_emotion = nn.Linear(self.bert.config.hidden_size, num_labels_emotion)

    def forward(self, input_ids, attention_mask, task='sentiment'):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        
        if task == 'sentiment':
            return self.classifier_sentiment(pooled_output)
        else:
            return self.classifier_emotion(pooled_output)

model = MultiTaskBERT(num_labels_sentiment=3, num_labels_emotion=6).to(device)

In [14]:
loss_fn_sentiment = nn.CrossEntropyLoss(weight=class_weights_sentiment.to(device))
loss_fn_emotion = nn.CrossEntropyLoss(weight=class_weights_emotion.to(device))

In [15]:
optimizer = AdamW(model.parameters(), lr=2e-5)
scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)



In [16]:
def train_epoch(model, sentiment_loader, emotion_loader, loss_fn_sentiment, loss_fn_emotion, optimizer, device, epoch):
    model.train()
    total_loss = 0
    correct_sentiment = 0
    correct_emotion = 0
    total_sentiment_samples = 0
    total_emotion_samples = 0

    for batch_idx, (batch_sentiment, batch_emotion) in enumerate(zip(sentiment_loader, emotion_loader)):
        optimizer.zero_grad()

        # === === === === SENTIMENT === === === ===
        input_ids_sent = batch_sentiment['input_ids'].to(device)
        attention_mask_sent = batch_sentiment['attention_mask'].to(device)
        labels_sent = batch_sentiment['label_sentiment'].to(device)

        logits_sentiment = model(input_ids_sent, attention_mask_sent, task='sentiment')
        loss_sentiment = loss_fn_sentiment(logits_sentiment, labels_sent)
        loss_sentiment.backward()
        optimizer.step()
        
        _, preds_sentiment = logits_sentiment.max(1)
        correct_sentiment += (preds_sentiment == labels_sent).sum().item()
        total_sentiment_samples += labels_sent.size(0)

        # === === === === EMOTION === === === ===
        input_ids_emot = batch_emotion['input_ids'].to(device)
        attention_mask_emot = batch_emotion['attention_mask'].to(device)
        labels_emot = batch_emotion['label_emotion'].to(device)

        logits_emotion = model(input_ids_emot, attention_mask_emot, task='emotion')
        loss_emotion = loss_fn_emotion(logits_emotion, labels_emot)
        loss_emotion.backward()
        optimizer.step() 
        
        _, preds_emotion = logits_emotion.max(1)
        correct_emotion += (preds_emotion == labels_emot).sum().item()
        total_emotion_samples += labels_emot.size(0)

        total_loss += loss_sentiment.item() + loss_emotion.item()
        
        if batch_idx % 10 == 0:
            avg_loss = total_loss / (batch_idx + 1)
            acc_sent = 100. * correct_sentiment / total_sentiment_samples
            acc_emot = 100. * correct_emotion / total_emotion_samples
            print(f"Epoch {epoch}. Batch {batch_idx}/{min(len(sentiment_loader), len(emotion_loader))}: "
                  f"AvgLoss: {avg_loss:.2f}, S.Acc: {acc_sent:.2f}%, E.Acc: {acc_emot:.2f}%")

    avg_loss = total_loss / min(len(sentiment_loader), len(emotion_loader))
    accuracy_sentiment = 100. * correct_sentiment / total_sentiment_samples
    accuracy_emotion = 100. * correct_emotion / total_emotion_samples

    return avg_loss, accuracy_sentiment, accuracy_emotion

In [17]:
def eval_model(model, sentiment_loader, emotion_loader, device):
    model.eval()
    correct_sentiment = 0
    correct_emotion = 0
    total_sentiment_samples = 0
    total_emotion_samples = 0

    with torch.no_grad():
        for batch_sentiment, batch_emotion in zip(sentiment_loader, emotion_loader):
            # === === === === SENTIMENT === === === ===
            input_ids_sent = batch_sentiment['input_ids'].to(device)
            attention_mask_sent = batch_sentiment['attention_mask'].to(device)
            labels_sent = batch_sentiment['label_sentiment'].to(device)

            logits_sentiment = model(input_ids_sent, attention_mask_sent, task='sentiment')
            _, preds_sentiment = torch.max(logits_sentiment, dim=1)
            correct_sentiment += torch.sum(preds_sentiment == labels_sent).item()
            total_sentiment_samples += labels_sent.size(0)

            # === === === === EMOTION === === === ===
            input_ids_emot = batch_emotion['input_ids'].to(device)
            attention_mask_emot = batch_emotion['attention_mask'].to(device)
            labels_emot = batch_emotion['label_emotion'].to(device)

            logits_emotion = model(input_ids_emot, attention_mask_emot, task='emotion')
            _, preds_emotion = torch.max(logits_emotion, dim=1)
            correct_emotion += torch.sum(preds_emotion == labels_emot).item()
            total_emotion_samples += labels_emot.size(0)

    accuracy_sentiment = 100. * correct_sentiment / total_sentiment_samples
    accuracy_emotion = 100. * correct_emotion / total_emotion_samples

    return accuracy_sentiment, accuracy_emotion

In [18]:
for epoch in range(5):
    train_loss, train_acc_sent, train_acc_emot = train_epoch(
        model, train_loader_sentiment, train_loader_emotion,
        loss_fn_sentiment, loss_fn_emotion, optimizer, device, epoch
    )
    print(f"Train Loss: {train_loss:.4f}, "
          f"Train Sentiment Accuracy: {train_acc_sent:.2f}%, "
          f"Train Emotion Accuracy: {train_acc_emot:.2f}%")
    val_acc_sent, val_acc_emot = eval_model(model, val_loader_sentiment, val_loader_emotion, device)
    print(f"Validation Sentiment Accuracy: {val_acc_sent:.2f}%, Validation Emotion Accuracy: {val_acc_emot:.2f}%")

Epoch 0. Batch 0/4290: AvgLoss: 2.83, S.Acc: 56.25%, E.Acc: 18.75%
Epoch 0. Batch 10/4290: AvgLoss: 2.91, S.Acc: 46.59%, E.Acc: 19.89%
Epoch 0. Batch 20/4290: AvgLoss: 2.88, S.Acc: 48.51%, E.Acc: 20.54%
Epoch 0. Batch 30/4290: AvgLoss: 2.84, S.Acc: 48.99%, E.Acc: 22.98%
Epoch 0. Batch 40/4290: AvgLoss: 2.79, S.Acc: 49.70%, E.Acc: 25.61%
Epoch 0. Batch 50/4290: AvgLoss: 2.75, S.Acc: 50.49%, E.Acc: 25.86%
Epoch 0. Batch 60/4290: AvgLoss: 2.67, S.Acc: 52.87%, E.Acc: 27.87%
Epoch 0. Batch 70/4290: AvgLoss: 2.62, S.Acc: 53.96%, E.Acc: 29.58%
Epoch 0. Batch 80/4290: AvgLoss: 2.58, S.Acc: 54.86%, E.Acc: 31.94%
Epoch 0. Batch 90/4290: AvgLoss: 2.54, S.Acc: 56.18%, E.Acc: 33.10%
Epoch 0. Batch 100/4290: AvgLoss: 2.50, S.Acc: 56.56%, E.Acc: 35.02%
Epoch 0. Batch 110/4290: AvgLoss: 2.46, S.Acc: 57.38%, E.Acc: 36.94%
Epoch 0. Batch 120/4290: AvgLoss: 2.41, S.Acc: 57.80%, E.Acc: 38.84%
Epoch 0. Batch 130/4290: AvgLoss: 2.35, S.Acc: 58.83%, E.Acc: 41.32%
Epoch 0. Batch 140/4290: AvgLoss: 2.31, S.Acc

# Model evaluation

In [19]:
test_acc_sent, test_acc_emot = eval_model(model, test_loader_sentiment, test_loader_emotion, device)

print(f"Test Sentiment Accuracy: {test_acc_sent:.2f}%")
print(f"Test Emotion Accuracy: {test_acc_emot:.2f}%")

Test Sentiment Accuracy: 70.89%
Test Emotion Accuracy: 95.17%


# Summary

| Epoch        | Train Accuracy Sentiment | Validation Accuracy Sentiment | Train Accuracy Emotion | Validation Accuracy Emotion |
|--------------|--------------------------|-------------------------------|------------------------|-----------------------------|
| **Epoch 1**  | 69.85%                   | 72.86%                        | 91.19%                 | 94.99%                      |
| **Epoch 2**  | 77.60%                   | 72.24%                        | 94.85%                 | 94.89%                      |
| **Epoch 3**  | 85.17%                   | 72.13%                        | 95.17%                 | 95.11%                      |
| **Epoch 4**  | 90.85%                   | 71.64%                        | 95.24%                 | 95.05%                      |
| **Epoch 5**  | 93.71%                   | 71.26%                        | 95.36%                 | 95.23%                      |

### Observation
* The training accuracy for both sentiment and emotion classification steadily improves over epochs. However, validation accuracy stagnates around 72% for sentiment and 95% for emotion after the first epoch.
* Training accuracy for sentiment rises (from 69.85% in Epoch 1 to 93.71% in Epoch 5), but validation accuracy decreases slightly (from 72.86% to 71.26%). This indicates overfitting.
* The model achieves high emotion classification accuracy early (91.19% in Epoch 1) and remains stable (~95%) across all epochs. Unlike sentiment classification, overfitting is minimal for emotion detection.

# Model serialization

In [None]:
torch.save(model.state_dict(), './bert_multitasking_model_overfitted/bert_multitasking_model.pth')
tokenizer.save_pretrained('./bert_multitasking_model_overfitted')

('./bert_multitasking_model\\tokenizer_config.json',
 './bert_multitasking_model\\special_tokens_map.json',
 './bert_multitasking_model\\vocab.txt',
 './bert_multitasking_model\\added_tokens.json')

In [None]:
torch.save(model, './bert_multitasking_model_full_overfitted/bert_multitasking_model_full.pth')
tokenizer.save_pretrained('./bert_multitasking_model_full_overfitted')

('./bert_multitasking_model_full\\tokenizer_config.json',
 './bert_multitasking_model_full\\special_tokens_map.json',
 './bert_multitasking_model_full\\vocab.txt',
 './bert_multitasking_model_full\\added_tokens.json')