In [1]:

from datasets import load_dataset

from transformers import (
    AutoTokenizer,
    BertModel, BertConfig,
   )

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm

from peft import LoraConfig
import evaluate
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


### Evaluation

In [3]:

# import accuracy evaluation metric
accuracy = evaluate.load("accuracy")
rmse = evaluate.load('mse')

# define an evaluation function to pass into trainer later
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    return {"accuracy": accuracy.compute(predictions=predictions, references=labels)}

### Apply untrained model on text

In [4]:
base_model_name = "bert-base-uncased"

In [None]:
from dataloaders.task1 import dataLoader as dataLoader1
from dataloaders.task2 import dataLoader as dataLoader2
from dataloaders.task3 import dataLoader as dataLoader3
from dataloaders.task4 import dataLoader as dataLoader4

batch_size = 16

# task1_train_dataloader,task1_val_dataloader,task1_test_dataloader = dataLoader1(base_model_name,batch_size=batch_size)
# task2_train_dataloader,task2_val_dataloader,task2_test_dataloader = dataLoader2(base_model_name,batch_size=batch_size)
# task3_train_dataloader,task3_val_dataloader = dataLoader3(base_model_name,batch_size=batch_size)
# task4_train_dataloader,task4_val_dataloader,task4_test_dataloader = dataLoader4(base_model_name,batch_size=batch_size)

In [6]:
class BertForSEQCLF(nn.Module):
    def __init__(self, hidden_size, num_labels):
        super(BertForSEQCLF, self).__init__()
        self.classifier = nn.Linear(hidden_size, num_labels)

    def forward(self, sequence_output):
        logits = self.classifier(sequence_output[:, 0])  # Take the [CLS] token's hidden state
        return logits

class BertForTextSummarization(nn.Module):
    def __init__(self, hidden_size):
        super(BertForTextSummarization, self).__init__()
        self.decoder = nn.Linear(hidden_size, hidden_size)  # You may want to use a more sophisticated decoder

    def forward(self, sequence_output):
        return self.decoder(sequence_output)
    
import torch.nn.functional as F

class BertForSTS(nn.Module):
    def __init__(self, hidden_size):
        super(BertForSTS, self).__init__()
        self.dense = nn.Linear(hidden_size, 1)
        # self.sigmoid = nn.Sigmoid()

    def forward(self, pooled_output):
        # pooled_output = sequence_output[:, 0]  # Using [CLS] token output
        logits = self.dense(pooled_output)
        # scaled_logit = 5 * self.sigmoid(logits)
         # Approximate sigmoid using two ReLUs
        approx_sigmoid = F.relu(logits) - F.relu(logits - 5)
        return approx_sigmoid
        # return scaled_logit
        
class BertForQuestionAnswering(nn.Module):
    def __init__(self, hidden_size):
        super(BertForQuestionAnswering, self).__init__()
        self.qa_outputs = nn.Linear(hidden_size, 2)

    def forward(self, sequence_output):
        logits = self.qa_outputs(sequence_output)
        start_logits, end_logits = logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)
        return {'start_logits' :start_logits, "end_logits" : end_logits}


In [7]:
class UnifiedModel(nn.Module):
    def __init__(self, base_model_name):
        super(UnifiedModel, self).__init__()
        config = BertConfig.from_pretrained(base_model_name)
        self.base_model = BertModel.from_pretrained(base_model_name, config=config)
        
        task1_lora_config = LoraConfig(
            task_type="SEQ_CLS",
            r=4,
            lora_alpha=32,
            lora_dropout=0.01,
            target_modules=['query', 'value']
        )
        task2_lora_config = LoraConfig(
            task_type="SEQ_CLS",
            r=4,
            lora_alpha=32,
            lora_dropout=0.01,
            target_modules=['query', 'value']
        )
        task3_lora_config = LoraConfig(
            task_type="SEQ2SEQ_LM",
            r=4,
            lora_alpha=32,
            lora_dropout=0.01,
            target_modules=['query', 'value']
        )
        
        self.base_model.add_adapter(task1_lora_config, adapter_name="adapter_task1")
        self.base_model.add_adapter(task2_lora_config, adapter_name="adapter_task2")
        self.base_model.add_adapter(task3_lora_config, adapter_name="adapter_task3")
        
        self.task1_head = BertForSEQCLF(self.base_model.config.hidden_size, 2)
        self.task2_head = BertForSTS(self.base_model.config.hidden_size)
        self.task3_head = BertForQuestionAnswering(self.base_model.config.hidden_size)
    
    def forward(self, input_ids, attention_mask=None, token_type_ids = None, task="task1"):
        
        if task == "task1":
            self.base_model.set_adapter("adapter_task1")
            base_outputs = self.base_model(input_ids, attention_mask=attention_mask)
            sequence_output = base_outputs[0]
            return self.task1_head(sequence_output)
        elif task == "task2":
            self.base_model.set_adapter("adapter_task2")
            base_outputs = self.base_model(input_ids, attention_mask=attention_mask,token_type_ids = token_type_ids)
            sequence_output = base_outputs.pooler_output
            return self.task2_head(sequence_output)
        elif task == "task3":
            self.base_model.set_adapter("adapter_task3")
            base_outputs = self.base_model(input_ids, attention_mask=attention_mask)
            sequence_output = base_outputs[0]
            return self.task3_head(sequence_output)
        else:
            raise ValueError(f"Unknown task: {task}")

        

In [12]:
import torch
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from tqdm import tqdm

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Corrected train function
def train(model, train_loader, val_loader, loss_fn, num_epochs=3, learning_rate=5e-5, task='task1'):
    best_model = model
    best_loss = float('inf')
    
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    total_steps = len(train_loader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0

        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch + 1}"):
            optimizer.zero_grad()

            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            start_positions = batch['answer_start'].to(device)
            end_positions = batch['answer_end'].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, task=task)
            start_logits, end_logits = outputs['start_logits'], outputs['end_logits']
            start_logits = start_logits.squeeze(-1)
            end_logits = end_logits.squeeze(-1)

            start_loss = loss_fn(start_logits, start_positions)
            end_loss = loss_fn(end_logits, end_positions)
            loss = start_loss + end_loss
            total_train_loss += loss.item()
            
            loss.backward()
            optimizer.step()
            scheduler.step()

        avg_train_loss = total_train_loss / len(train_loader)
        print(f"Training Loss: {avg_train_loss:.4f}")

        cur_loss, _ = validate(model, val_loader, loss_fn, task)
        
        if cur_loss < best_loss:
            best_model = model
            best_loss = cur_loss
            # Save the best model
            torch.save(best_model.state_dict(), os.path.join(save_directory, 'best_model.pt'))
            print(f"New best model saved with validation loss: {cur_loss:.4f}")
            
    return best_model

from tqdm import tqdm
import torch
import evaluate  # Assuming you have the `evaluate` library installed

accuracy = evaluate.load("accuracy")

def validate(model, val_loader, loss_fn, task='task1'):
    model.eval()
    total_val_loss = 0
    val_y_start = []
    val_y_end = []
    val_pred_start = []
    val_pred_end = []
    
    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Validation"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            start_positions = batch['answer_start'].to(device)
            end_positions = batch['answer_end'].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, task=task)
            start_logits, end_logits = outputs['start_logits'], outputs['end_logits']
            start_logits = start_logits.squeeze(-1)
            end_logits = end_logits.squeeze(-1)

            start_loss = loss_fn(start_logits, start_positions)
            end_loss = loss_fn(end_logits, end_positions)
            loss = start_loss + end_loss
            total_val_loss += loss.item()
            
            val_pred_start.extend(start_logits.argmax(dim=-1).cpu().numpy())
            val_pred_end.extend(end_logits.argmax(dim=-1).cpu().numpy())
            val_y_start.extend(start_positions.argmax(dim=-1).cpu().numpy())
            val_y_end.extend(end_positions.argmax(dim=-1).cpu().numpy())

    avg_val_loss = total_val_loss / len(val_loader)

    accuracy_start = accuracy.compute(predictions=val_pred_start, references=val_y_start)
    accuracy_end = accuracy.compute(predictions=val_pred_end, references=val_y_end)

    print(f'Validation Loss: {avg_val_loss:.4f} Start Accuracy: {accuracy_start["accuracy"]:.4f} End Accuracy: {accuracy_end["accuracy"]:.4f}')
    
    return avg_val_loss, (val_pred_start, val_pred_end)


In [9]:
from train_val_test.task3_train_test_val import train as task3_train,validate as task3_validate
from architecture import Model

In [10]:
# loss_fn = nn.CrossEntropyLoss()

# unified_model = Model(base_model_name)
# unified_model.to(device)
# z = task3_validate(unified_model,task3_val_dataloader,loss_fn,'task3')

Validation: 100%|██████████| 661/661 [00:40<00:00, 16.40it/s]

Validation Loss: 12.4112 Start Accuracy: 0.0011 End Accuracy: 0.0065





In [11]:
# model = task3_train(unified_model, task3_train_dataloader, task3_val_dataloader,loss_fn, num_epochs=15, learning_rate=5e-5,task = 'task3')

Training Epoch 1: 100%|██████████| 5475/5475 [10:47<00:00,  8.46it/s]


Training Loss: 5.2118


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.43it/s]


Validation Loss: 3.2308 Start Accuracy: 0.5465 End Accuracy: 0.5876
New best model saved with validation loss: 3.2308


Training Epoch 2: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 3.1951


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.44it/s]


Validation Loss: 2.7060 Start Accuracy: 0.6073 End Accuracy: 0.6495
New best model saved with validation loss: 2.7060


Training Epoch 3: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.8225


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.39it/s]


Validation Loss: 2.5278 Start Accuracy: 0.6339 End Accuracy: 0.6698
New best model saved with validation loss: 2.5278


Training Epoch 4: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.6331


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.37it/s]


Validation Loss: 2.4158 Start Accuracy: 0.6465 End Accuracy: 0.6847
New best model saved with validation loss: 2.4158


Training Epoch 5: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.5227


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.42it/s]


Validation Loss: 2.3372 Start Accuracy: 0.6513 End Accuracy: 0.6922
New best model saved with validation loss: 2.3372


Training Epoch 6: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.4388


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.41it/s]


Validation Loss: 2.2793 Start Accuracy: 0.6550 End Accuracy: 0.6961
New best model saved with validation loss: 2.2793


Training Epoch 7: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.3746


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.42it/s]


Validation Loss: 2.2633 Start Accuracy: 0.6631 End Accuracy: 0.7021
New best model saved with validation loss: 2.2633


Training Epoch 8: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.3269


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.45it/s]


Validation Loss: 2.2390 Start Accuracy: 0.6631 End Accuracy: 0.7029
New best model saved with validation loss: 2.2390


Training Epoch 9: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.2901


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.44it/s]


Validation Loss: 2.2203 Start Accuracy: 0.6654 End Accuracy: 0.7070
New best model saved with validation loss: 2.2203


Training Epoch 10: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.2582


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.42it/s]


Validation Loss: 2.1933 Start Accuracy: 0.6692 End Accuracy: 0.7082
New best model saved with validation loss: 2.1933


Training Epoch 11: 100%|██████████| 5475/5475 [10:48<00:00,  8.45it/s]


Training Loss: 2.2370


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.43it/s]


Validation Loss: 2.1848 Start Accuracy: 0.6677 End Accuracy: 0.7077
New best model saved with validation loss: 2.1848


Training Epoch 12: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.2127


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.43it/s]


Validation Loss: 2.1787 Start Accuracy: 0.6684 End Accuracy: 0.7105
New best model saved with validation loss: 2.1787


Training Epoch 13: 100%|██████████| 5475/5475 [10:48<00:00,  8.45it/s]


Training Loss: 2.1906


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.43it/s]


Validation Loss: 2.1785 Start Accuracy: 0.6713 End Accuracy: 0.7099
New best model saved with validation loss: 2.1785


Training Epoch 14: 100%|██████████| 5475/5475 [10:48<00:00,  8.45it/s]


Training Loss: 2.1808


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.42it/s]


Validation Loss: 2.1701 Start Accuracy: 0.6710 End Accuracy: 0.7117
New best model saved with validation loss: 2.1701


Training Epoch 15: 100%|██████████| 5475/5475 [10:48<00:00,  8.44it/s]


Training Loss: 2.1685


Validation: 100%|██████████| 661/661 [00:40<00:00, 16.42it/s]


Validation Loss: 2.1686 Start Accuracy: 0.6712 End Accuracy: 0.7105
New best model saved with validation loss: 2.1686


# Task 1

In [11]:
from dataloaders.task1 import dataLoader as dataLoader1

batch_size = 64

task1_train_dataloader,task1_val_dataloader,task1_test_dataloader = dataLoader1(base_model_name,batch_size=batch_size)



In [14]:
from train_val_test.task1_train_test_val import train as task1_train,validate as task1_validate
from architecture import Model


In [7]:
loss_fn = nn.CrossEntropyLoss()

unified_model = Model(base_model_name)
load_dict = torch.load('/home/jyotish/isro/MYProjects/model_checkpoints/best_model.pt')
unified_model.load_state_dict(load_dict)
unified_model.to(device)

z = task1_validate(unified_model,task1_val_dataloader,loss_fn,'task1')

  load_dict = torch.load('/home/jyotish/isro/MYProjects/model_checkpoints/best_model.pt')
Validation: 100%|██████████| 14/14 [00:03<00:00,  4.35it/s]

Validation Loss: 0.7505 Accuracy : {'accuracy': 0.4908256880733945}





In [8]:
model1 = task1_train(unified_model, task1_train_dataloader, task1_val_dataloader,loss_fn, num_epochs=5, learning_rate=5e-5,task = 'task1')

Training Epoch 1: 100%|██████████| 1053/1053 [08:27<00:00,  2.07it/s]


Training Loss: 0.3276


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2521 Accuracy : {'accuracy': 0.9048165137614679}
New best model saved with validation loss: 0.2521


Training Epoch 2: 100%|██████████| 1053/1053 [08:27<00:00,  2.07it/s]


Training Loss: 0.2463


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2529 Accuracy : {'accuracy': 0.9002293577981652}


Training Epoch 3: 100%|██████████| 1053/1053 [08:28<00:00,  2.07it/s]


Training Loss: 0.2319


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2423 Accuracy : {'accuracy': 0.9048165137614679}
New best model saved with validation loss: 0.2423


Training Epoch 4: 100%|██████████| 1053/1053 [08:28<00:00,  2.07it/s]


Training Loss: 0.2184


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.57it/s]


Validation Loss: 0.2430 Accuracy : {'accuracy': 0.9105504587155964}


Training Epoch 5: 100%|██████████| 1053/1053 [08:27<00:00,  2.07it/s]


Training Loss: 0.2130


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2412 Accuracy : {'accuracy': 0.9128440366972477}
New best model saved with validation loss: 0.2412


In [9]:
model2 = task1_train(model1, task1_train_dataloader, task1_val_dataloader,loss_fn, num_epochs=5, learning_rate=5e-5,task = 'task1')

Training Epoch 1: 100%|██████████| 1053/1053 [08:27<00:00,  2.07it/s]


Training Loss: 0.2116


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2393 Accuracy : {'accuracy': 0.9139908256880734}
New best model saved with validation loss: 0.2393


Training Epoch 2: 100%|██████████| 1053/1053 [08:27<00:00,  2.07it/s]


Training Loss: 0.1997


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2403 Accuracy : {'accuracy': 0.908256880733945}


Training Epoch 3: 100%|██████████| 1053/1053 [08:28<00:00,  2.07it/s]


Training Loss: 0.1895


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2555 Accuracy : {'accuracy': 0.9094036697247706}


Training Epoch 4: 100%|██████████| 1053/1053 [08:27<00:00,  2.07it/s]


Training Loss: 0.1831


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.56it/s]


Validation Loss: 0.2462 Accuracy : {'accuracy': 0.911697247706422}


Training Epoch 5: 100%|██████████| 1053/1053 [08:28<00:00,  2.07it/s]


Training Loss: 0.1798


Validation: 100%|██████████| 14/14 [00:03<00:00,  4.54it/s]

Validation Loss: 0.2493 Accuracy : {'accuracy': 0.908256880733945}





In [13]:
def print_trainable_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Total parameters: {total_params:,}")

print_trainable_parameters(model2)

Trainable parameters: 151,301
Total parameters: 109,928,453


In [5]:
from train_val_test.task2_train_test_val import train as task2_train,validate as task2_validate
from architecture import Model

In [6]:
from dataloaders.task2 import dataLoader as dataLoader2

batch_size = 64

task2_train_dataloader,task2_val_dataloader,task2_test_dataloader = dataLoader2(base_model_name,batch_size=batch_size)



In [7]:
loss_fn2 = nn.MSELoss()

unified_model = Model(base_model_name)
load_dict = torch.load('/home/jyotish/isro/MYProjects/model_checkpoints/task1e10_best_model.pt')
unified_model.load_state_dict(load_dict)
unified_model.to(device)

z = task2_validate(unified_model,task2_val_dataloader,loss_fn2,'task2')

  load_dict = torch.load('/home/jyotish/isro/MYProjects/model_checkpoints/task1e10_best_model.pt')
Validation: 100%|██████████| 24/24 [00:05<00:00,  4.38it/s]

RMSE : {'mse': 2.1515507550761046}
Validation Loss: 4.6633





In [10]:
model3 = task2_train(model3, task2_train_dataloader, task2_val_dataloader,loss_fn2, num_epochs=10, learning_rate=5e-5,task = 'task2')

Training Epoch 1: 100%|██████████| 90/90 [00:43<00:00,  2.09it/s]


Training Loss: 0.5996


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.43it/s]


RMSE : {'mse': 0.7376636581532429}
Validation Loss: 0.5540
New best model saved with validation loss: 0.5540


Training Epoch 2: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.5700


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.43it/s]


RMSE : {'mse': 0.7284092391372092}
Validation Loss: 0.5402
New best model saved with validation loss: 0.5402


Training Epoch 3: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.5456


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.44it/s]


RMSE : {'mse': 0.7490308413405967}
Validation Loss: 0.5715


Training Epoch 4: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.5258


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.44it/s]


RMSE : {'mse': 0.716831657601152}
Validation Loss: 0.5235
New best model saved with validation loss: 0.5235


Training Epoch 5: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.5272


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.44it/s]


RMSE : {'mse': 0.7133034954094484}
Validation Loss: 0.5185
New best model saved with validation loss: 0.5185


Training Epoch 6: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.5063


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.43it/s]


RMSE : {'mse': 0.7137926175937406}
Validation Loss: 0.5190


Training Epoch 7: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.4946


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.44it/s]


RMSE : {'mse': 0.7262863006212984}
Validation Loss: 0.5378


Training Epoch 8: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.4931


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.43it/s]


RMSE : {'mse': 0.7198553904376429}
Validation Loss: 0.5283


Training Epoch 9: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.4790


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.44it/s]


RMSE : {'mse': 0.7222075276532882}
Validation Loss: 0.5319


Training Epoch 10: 100%|██████████| 90/90 [00:43<00:00,  2.08it/s]


Training Loss: 0.4735


Validation: 100%|██████████| 24/24 [00:05<00:00,  4.44it/s]

RMSE : {'mse': 0.720309877978332}
Validation Loss: 0.5290
Early stopping at epoch 10 due to no improvement for 5 consecutive epochs.





In [15]:
loss_fn = nn.CrossEntropyLoss()
z = task2_validate(model3,task2_val_dataloader,loss_fn2,'task2')
z = task1_validate(model3,task1_val_dataloader,loss_fn,'task1')

Validation: 100%|██████████| 24/24 [00:05<00:00,  4.64it/s]


RMSE : {'mse': 0.720309877978332}
Validation Loss: 0.5290


Validation: 100%|██████████| 14/14 [00:02<00:00,  4.69it/s]

Validation Loss: 0.2393 Accuracy : {'accuracy': 0.9139908256880734}





In [None]:
unified_model.to('cpu')
tokenizer = AutoTokenizer.from_pretrained(base_model_name, add_prefix_space=True)

val_preds = []
val_truth = []
print("UnTrained model predictions: on validation set")
print("--------------------------")
for i,inputs in enumerate(test_dataset):
    x = tokenizer(inputs['sentence1'],inputs['sentence2'], truncation=True, padding='max_length', return_tensors='pt')
       
    logits = model(input_ids=x['input_ids'], attention_mask=x['attention_mask'], token_type_ids = x['token_type_ids'], task='task2')
    print(f'Sentence 1 : {inputs['sentence1']} ; Sentence 2 : {inputs['sentence2']} ; TestScore {logits} ; ActualScore {inputs['score']}')
    
    # for j in range(len(inputs)):
    #     print(f'Sentence 1 : {test_dataset['sentence1'][j]} ; Sentence 2 : {test_dataset['sentence2'][j]} ; TestScore {logits[j]} ; ActualScore {test_dataset['score'][j]}')
    # break
    
# print(f'Accuracy on Validation Data : {accuracy.compute(predictions=val_preds, references=val_truth)}')

In [49]:
def print_trainable_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Total parameters: {total_params:,}")

print_trainable_parameters(unified_model)

Trainable parameters: 1,330,947
Total parameters: 111,255,555


In [108]:
def print_trainable_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Total parameters: {total_params:,}")

print_trainable_parameters(model)


Trainable parameters: 147,456
Total parameters: 124,794,626


In [110]:
model.to('cpu')

print("Trained model predictions:")
print("--------------------------")
for text in text_list:
    inputs = tokenizer.encode(text, return_tensors="pt").to("cpu")

    logits = model(inputs).logits
    predictions = torch.max(logits,1).indices

    print(text + " - " + id2label[predictions.tolist()[0]])

Trained model predictions:
--------------------------
a feel-good picture in the best sense of the term . - Positive
resourceful and ingenious entertainment . - Positive
it 's just incredibly dull . - Negative
the movie 's biggest offense is its complete and utter lack of tension . - Negative
impresses you with its open-endedness and surprises . - Positive
unless you are in dire need of a diesel fix , there is no real reason to see it . - Negative


In [89]:
model.to('cpu')

val_preds = []
val_truth = []
print("Trained model predictions: on validation set")
print("--------------------------")
for i,text in enumerate(tokenized_dataset['validation']['sentence']):
    inputs = tokenizer.encode(text, return_tensors="pt").to("cpu")

    logits = model(inputs).logits
    predictions = torch.max(logits,1).indices
    val_preds.append(predictions.tolist()[0])
    val_truth.append(tokenized_dataset['validation']['label'][i])
    # print(text + " - " + id2label[predictions.tolist()[0]])
    
print(f'Accuracy on Validation Data : {accuracy.compute(predictions=val_preds, references=val_truth)}')

Trained model predictions: on validation set
--------------------------
Accuracy on Validation Data : {'accuracy': 0.9197247706422018}
