In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from transformers import AutoTokenizer, BertModel, AdamW;

import torch;
import torch.nn as nn;
import torch.nn.functional as F;
import torch.optim as optim;
from torch.utils.data import Dataset, DataLoader;

In [None]:
import os;
import os.path;

In [None]:
import numpy as np;
import matplotlib.pyplot as plt;

import statistics;

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu');
print(device);

cuda


In [None]:
"""
    Using bert model for downstream classification task.
"""

class BERTClassifier(nn.Module):
    def __init__(self, model_name, num_classes):
        super(BERTClassifier, self).__init__();
        self.bert = BertModel.from_pretrained(model_name);

        # freezing model's parameters
        for param in self.bert.parameters():
            param.requires_grad = False;

        self.dropout = nn.Dropout(p=0.2);
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes);

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask);
        output = self.dropout(output.pooler_output);
        output = self.fc(output);
        return output;

In [None]:
bert_model_name = 'bert-base-uncased';
num_classes = 3;
max_length = 512;
batch_size = 16;
epochs = 3;
learning_rate = 1e-3;

In [None]:
tokenizer = AutoTokenizer.from_pretrained(bert_model_name);

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
class Dataset(nn.Module):
    def __init__(self,path_x,path_y,max_length):

        self.max_length = max_length;

        if not os.path.exists(path_x):
            raise FileNotFoundError(path_x);
        if not os.path.exists(path_y):
            raise FileNotFoundError(path_y);

        with open(path_x,'r') as f:
            self.x = f.readlines();

        with open(path_y,'r') as f:
            self.y = f.readlines();

        if len(self.x) != len(self.y):
            raise ValueError("x and y must have same length");

    def __len__(self):
        return len(self.x);

    def __getitem__(self,idx):
        x = self.x[idx];
        y = self.y[idx];
        output = tokenizer(x, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt');
        return {
            'input_ids': output['input_ids'].flatten(),
            'attention_mask': output['attention_mask'].flatten(),
            'label' : torch.tensor(int(self.y[idx]))
        };

In [None]:
train_dataset = Dataset('/content/drive/MyDrive/Data/DATA/trainset_cmb.txt',
                        '/content/drive/MyDrive/Data/DATA/y_train.txt',max_length);

val_dataset = Dataset('/content/drive/MyDrive/Data/DATA/valset_cmb.txt',
                      '/content/drive/MyDrive/Data/DATA/y_val.txt',max_length);

train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True);
val_loader = DataLoader(val_dataset,batch_size=batch_size,shuffle=True);

In [None]:
model = BERTClassifier(bert_model_name, num_classes);
model = model.to(device);
optimizer = AdamW(model.parameters(), lr=learning_rate);
criterion = nn.CrossEntropyLoss();

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]



In [None]:
store_best_path = "/content/drive/MyDrive/PARAM/";

In [None]:
epochs = 20;

train_loss = [];
val_loss = [];
train_acc = [];
val_acc = [];

for epoch in range(epochs):

    train_loss_epoch = [];
    val_loss_epoch = [];
    train_acc_epoch = [];
    val_acc_epoch = [];

    model.train();
    for batch in train_loader:
        optimizer.zero_grad();

        input_ids = batch['input_ids'].to(device);
        attention_mask = batch['attention_mask'].to(device);
        y = batch['label'].to(device);

        output = model(input_ids,attention_mask);
        loss = criterion(output,y);
        loss.backward();
        optimizer.step();

        train_loss_epoch.append(loss.item());

        accuracy = torch.argmax(output,dim=-1).view(-1) == y.view(-1);
        train_acc_epoch.append((torch.sum(accuracy) / len(accuracy)).item());


    model.eval();
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device);
            attention_mask = batch['attention_mask'].to(device);
            y = batch['label'].to(device);

            output = model(input_ids,attention_mask);
            loss = criterion(output,y);

            val_loss_epoch.append(loss.item());

            accuracy = torch.argmax(output,dim=-1).view(-1) == y.view(-1);
            val_acc_epoch.append((torch.sum(accuracy) / len(accuracy)).item());


    print(f"Epoch[{epoch}][{epochs}] : Training Loss :{statistics.mean(train_loss_epoch)}, Validation Loss :{statistics.mean(val_loss_epoch)}, \
Training Accuracy :{statistics.mean(train_acc_epoch)}, Validation Accuracy :{statistics.mean(val_acc_epoch)}");

    if len(val_loss) == 0 or statistics.mean(val_loss_epoch) < min(val_loss):
        torch.save(model.state_dict(),store_best_path + 'bert_wth_rouge_embed.pt');

    train_loss.append(statistics.mean(train_loss_epoch));
    val_loss.append(statistics.mean(val_loss_epoch));
    train_acc.append(statistics.mean(train_acc_epoch));
    val_acc.append(statistics.mean(val_acc_epoch));

Epoch[0][20] : Training Loss :1.0550477057695389, Validation Loss :1.0404236912727356, Training Accuracy :0.4271875, Validation Accuracy :0.43875
Epoch[1][20] : Training Loss :1.0393070521950722, Validation Loss :1.0268273210525514, Training Accuracy :0.4540625, Validation Accuracy :0.42375
Epoch[2][20] : Training Loss :1.040709409713745, Validation Loss :0.999090369939804, Training Accuracy :0.438125, Validation Accuracy :0.46
Epoch[3][20] : Training Loss :1.0394719383120536, Validation Loss :1.0085554385185242, Training Accuracy :0.4534375, Validation Accuracy :0.46625
Epoch[4][20] : Training Loss :1.015251940190792, Validation Loss :0.9922445082664489, Training Accuracy :0.473125, Validation Accuracy :0.4775
Epoch[5][20] : Training Loss :1.019345808327198, Validation Loss :1.0224869048595429, Training Accuracy :0.4659375, Validation Accuracy :0.45375
Epoch[6][20] : Training Loss :1.027528101503849, Validation Loss :0.9914641535282135, Training Accuracy :0.460625, Validation Accuracy