In [2]:
from datasets import load_dataset

from transformers import (
    AutoTokenizer,
    BertModel, BertConfig,
   )

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm

from peft import LoraConfig
import evaluate
import torch
import numpy as np

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [4]:

# import accuracy evaluation metric
accuracy = evaluate.load("accuracy")
rmse = evaluate.load('mse')

# define an evaluation function to pass into trainer later
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    return {"accuracy": accuracy.compute(predictions=predictions, references=labels)}

In [5]:
base_model_name = "bert-base-uncased"

In [6]:
from dataloaders.task1 import dataLoader as dataLoader1
from dataloaders.task2 import dataLoader as dataLoader2
from dataloaders.task3 import dataLoader as dataLoader3
from dataloaders.task4 import dataLoader as dataLoader4

batch_size = 50

task1_train_dataloader,task1_val_dataloader,task1_test_dataloader = dataLoader1(base_model_name,batch_size=batch_size)
# task2_train_dataloader,task2_val_dataloader,task2_test_dataloader = dataLoader2(base_model_name,batch_size=batch_size)
# task3_train_dataloader,task3_val_dataloader = dataLoader3(base_model_name,batch_size=batch_size)
# task4_train_dataloader,task4_val_dataloader,task4_test_dataloader = dataLoader4(base_model_name,batch_size=batch_size)



In [7]:
class BertForSEQCLF(nn.Module):
    def __init__(self, hidden_size, num_labels):
        super(BertForSEQCLF, self).__init__()
        self.classifier = nn.Linear(hidden_size, num_labels)

    def forward(self, sequence_output):
        logits = self.classifier(sequence_output[:, 0])  # Take the [CLS] token's hidden state
        return logits

class BertForTextSummarization(nn.Module):
    def __init__(self, hidden_size):
        super(BertForTextSummarization, self).__init__()
        self.decoder = nn.Linear(hidden_size, hidden_size)  # You may want to use a more sophisticated decoder

    def forward(self, sequence_output):
        return self.decoder(sequence_output)
    
import torch.nn.functional as F

class BertForSTS(nn.Module):
    def __init__(self, hidden_size):
        super(BertForSTS, self).__init__()
        self.dense = nn.Linear(hidden_size, 1)
        # self.sigmoid = nn.Sigmoid()

    def forward(self, pooled_output):
        # pooled_output = sequence_output[:, 0]  # Using [CLS] token output
        logits = self.dense(pooled_output)
        # scaled_logit = 5 * self.sigmoid(logits)
         # Approximate sigmoid using two ReLUs
        approx_sigmoid = F.relu(logits) - F.relu(logits - 5)
        return approx_sigmoid
        # return scaled_logit
        
class BertForQuestionAnswering(nn.Module):
    def __init__(self, hidden_size):
        super(BertForQuestionAnswering, self).__init__()
        self.qa_outputs = nn.Linear(hidden_size, 2)

    def forward(self, sequence_output):
        logits = self.qa_outputs(sequence_output)
        start_logits, end_logits = logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)
        return {'start_logits' :start_logits, "end_logits" : end_logits}


In [8]:
class UnifiedModel(nn.Module):
    def __init__(self, base_model_name):
        super(UnifiedModel, self).__init__()
        config = BertConfig.from_pretrained(base_model_name)
        self.base_model = BertModel.from_pretrained(base_model_name, config=config)
        self.task1_head = BertForSEQCLF(self.base_model.config.hidden_size, 2)
    
    def forward(self, input_ids, attention_mask=None, token_type_ids = None, task="task1"):
        base_outputs = self.base_model(input_ids, attention_mask=attention_mask)
        sequence_output = base_outputs[0]
        return self.task1_head(sequence_output)
    
    def unfreeze_parameters(self):
        # Unfreeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = True
        
        # Unfreeze task-specific heads parameters
        for param in self.task1_head.parameters():
            param.requires_grad = True


In [9]:
from train_val_test.task1_train_test_val import train as task1_train,validate as task1_validate


In [10]:
loss_fn = nn.CrossEntropyLoss()

unified_model = UnifiedModel(base_model_name)
unified_model.unfreeze_parameters()
unified_model.to(device)

z = task1_validate(unified_model,task1_val_dataloader,loss_fn,'task1')

Validation: 100%|██████████| 18/18 [00:03<00:00,  5.78it/s]

Validation Loss: 0.7318 Accuracy : {'accuracy': 0.49311926605504586}





In [11]:
def print_trainable_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Total parameters: {total_params:,}")

print_trainable_parameters(unified_model)

Trainable parameters: 109,483,778
Total parameters: 109,483,778


In [13]:
model1 = task1_train(model1, task1_train_dataloader, task1_val_dataloader,loss_fn, num_epochs=2, learning_rate=5e-5,task = 'task1')

Training Epoch 1: 100%|██████████| 1347/1347 [10:20<00:00,  2.17it/s]


Training Loss: 0.1237


Validation: 100%|██████████| 18/18 [00:02<00:00,  6.44it/s]


Validation Loss: 0.2132 Accuracy : {'accuracy': 0.9220183486238532}
New best model saved with validation loss: 0.2132


Training Epoch 2: 100%|██████████| 1347/1347 [10:20<00:00,  2.17it/s]


Training Loss: 0.0570


Validation: 100%|██████████| 18/18 [00:02<00:00,  6.45it/s]

Validation Loss: 0.2215 Accuracy : {'accuracy': 0.926605504587156}



