<a href="https://colab.research.google.com/github/Sghosh32/NCG-Task/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/ncg-task/training-data.git
!git clone https://github.com/ncg-task/test-data.git

Cloning into 'training-data'...
remote: Enumerating objects: 6864, done.[K
remote: Counting objects: 100% (3083/3083), done.[K
remote: Compressing objects: 100% (2728/2728), done.[K
remote: Total 6864 (delta 567), reused 2504 (delta 279), pack-reused 3781[K
Receiving objects: 100% (6864/6864), 157.36 MiB | 28.79 MiB/s, done.
Resolving deltas: 100% (660/660), done.
Updating files: 100% (3286/3286), done.
Cloning into 'test-data'...
remote: Enumerating objects: 2508, done.[K
remote: Total 2508 (delta 0), reused 0 (delta 0), pack-reused 2508[K
Receiving objects: 100% (2508/2508), 215.28 MiB | 18.97 MiB/s, done.
Resolving deltas: 100% (54/54), done.
Updating files: 100% (2060/2060), done.


In [None]:
!pip install torchdata
!pip install sentence-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchdata
  Downloading torchdata-0.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m85.1 MB/s[0m eta [36m0:00:00[0m
Collecting portalocker>=2.0.0
  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)
Installing collected packages: portalocker, torchdata
Successfully installed portalocker-2.7.0 torchdata-0.5.1


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import numpy as np
import random
from torch.utils.data import Dataset
import torch.optim as optim
from torchdata import DataLoader
from sentence_transformers import SentenceTransformer
from transformers import BertTokenizer
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Notebook is running on", device)

SEED = 4444

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, sentences, tokenizer, max_len):
        self.sentences = sentences
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, item):
        sentence = str(self.sentences[item])
        print(type(sentence))
        encoding = self.tokenizer.encode_plus(sentence,
            self.sentences,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'sentence': sentence,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(0, dtype=torch.long)
        }


In [None]:
def create_data_loader(df, tokenizer, max_len, batch_size):
    ds = Dataset(
        sentences=df,
        tokenizer=tokenizer,
        max_len=max_len
    )

    return torch.utils.data.DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=False
    )
train_path = '/content/training-data/'
test_path = '/content/test-data/'

train_loader = create_data_loader(train_path,BertTokenizer,512,1)
test_loader = create_data_loader(test_path,BertTokenizer,512,1)

In [None]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hidden_dimension, n_heads, dropout):
        super(MultiHeadAttentionLayer, self).__init__()
        self.hidden_dimension = hidden_dimension
        self.n_heads = n_heads
        self.head_dimension = hidden_dimension // n_heads
        self.fc_Q = nn.Linear(hidden_dimension, hidden_dimension)
        self.fc_K = nn.Linear(hidden_dimension, hidden_dimension)
        self.fc_V = nn.Linear(hidden_dimension, hidden_dimension)
        self.fc_O = nn.Linear(hidden_dimension, hidden_dimension)
        self.scale = math.sqrt(self.head_dimension)
        self.dropout = nn.Dropout(dropout)

    def forward(self, query, key, value, mask = None):
        batch_size = query.shape[0]
        Q = self.fc_Q(query)
        K = self.fc_K(key)
        V = self.fc_V(value)
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dimension).permute(0, 2, 1, 3)
        K = K.view(batch_size, -1, self.n_heads, self.head_dimension).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dimension).permute(0, 2, 1, 3)
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)
        attention = torch.softmax(energy, dim = -1)
        x = torch.matmul(self.dropout(attention), V)
        x = x.permute(0, 2, 1, 3).contiguous()
        output = self.fc_O(x.view(batch_size, -1, self.hidden_dimension))
        return output

In [None]:
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hidden_dimension, pf_dimension, dropout):
        super(PositionwiseFeedforwardLayer, self).__init__()
        self.fc_1 = nn.Linear(hidden_dimension, pf_dimension)
        self.fc_2 = nn.Linear(pf_dimension, hidden_dimension)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.dropout(torch.relu(self.fc_1(x)))
        x = self.fc_2(x)
        return x

In [None]:
class EncoderLayer(nn.Module):
    def __init__(self, hidden_dimension, n_heads, pf_dimension, dropout, device):
        super(EncoderLayer, self).__init__()
        self.self_attention_layer_norm = nn.LayerNorm(hidden_dimension)
        self.positionwise_feedforward_layer_norm = nn.LayerNorm(hidden_dimension)
        self.self_attention = MultiHeadAttentionLayer(hidden_dimension, n_heads, dropout)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hidden_dimension, pf_dimension, dropout)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_mask):
        _src, _ = self.self_attention(src, src, src, src_mask)
        src = self.self_attention_layer_norm(src + self.dropout(_src))
        _src = self.positionwise_feedforward(src)
        src = self.positionwise_feedforward_layer_norm(src + self.dropout(_src))
        return src

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dimension, hidden_dimension, n_layers, n_heads, pf_dimension, dropout, device, max_length = 100):
        super(Encoder, self).__init__()
        self.device = device
        self.tok_embedding = nn.Embedding(input_dimension, hidden_dimension)
        self.pos_embedding = nn.Embedding(max_length, hidden_dimension)
        self.layers = nn.ModuleList([EncoderLayer(hidden_dimension, n_heads, pf_dimension, dropout, device) for _ in range(n_layers)])
        self.dropout = nn.Dropout(dropout)
        self.scale = torch.sqrt(torch.FloatTensor([hidden_dimension])).to(device)

    def forward(self, src, src_mask):
        batch_size = src.shape[0]
        src_len = src.shape[1]
        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
        for layer in self.layers:
            src = layer(src, src_mask)
        return src

In [None]:
class Decoder_Layer(nn.Module):
    def __init__(self, hidden_dimension, n_heads, pff_dimension, dropout):
        super(Decoder_Layer, self).__init__()
        self.self_attention = MultiHeadAttentionLayer(hidden_dimension, n_heads, dropout)
        self.cross_attention = MultiHeadAttentionLayer(hidden_dimension, n_heads, dropout)
        self.pff = PositionwiseFeedforwardLayer(hidden_dimension, pff_dimension, dropout)
        self.attention_norm1 = nn.LayerNorm(hidden_dimension)
        self.attention_norm2 = nn.LayerNorm(hidden_dimension)
        self.pff_normalized = nn.LayerNorm(hidden_dimension)
        self.dropout = nn.Dropout(dropout)

    def forward(self, target, target_mask, encoder_output, source_mask):
        self_attention = self.self_attention(target, target, target, target_mask)
        output1 = self.attention_norm1(self.dropout(self_attention) + target)
        cross_attention = self.cross_attention(output1, encoder_output, encoder_output, source_mask)
        output2 = self.attention_norm2(self.dropout(cross_attention) + output1)
        pff_output = self.pff(output2)
        output = self.pff_normalized(self.dropout(pff_output) + output2)
        return output 

In [None]:
class Decoder(nn.Module):
    def __init__(self, token_vocab_size, positional_vocab_size, hidden_dimension, decoder_heads, decoder_pff_dimension, num_layers, decoder_dropout):
        super(Decoder, self).__init__()
        self.token_embedding = nn.Embedding(token_vocab_size, hidden_dimension)
        self.positional_embedding = nn.Embedding(positional_vocab_size, hidden_dimension)
        self.decoder_layers = nn.ModuleList([Decoder_Layer(hidden_dimension, decoder_heads, decoder_pff_dimension, decoder_dropout) for i in range(num_layers)])
        self.fc = nn.Linear(hidden_dimension, token_vocab_size)
        self.scale = math.sqrt(hidden_dimension)
        self.dropout = nn.Dropout(decoder_dropout)

    def forward(self, target, target_mask, encoder_output, source_mask):
        batch_size = target.shape[0]
        target_length = target.shape[1]
        token_embedding = self.token_embedding(target)
        positional_tensor = torch.arange(0, target_length).unsqueeze(0).repeat(batch_size, 1).to(device)
        positional_embedding = self.positional_embedding(positional_tensor)
        decoder_embedding = self.dropout(token_embedding * self.scale + positional_embedding)
        decoder_state = decoder_embedding
        for decoder_layer in self.decoder_layers:
            decoder_state = decoder_layer(decoder_state, target_mask, encoder_output, source_mask)
        output = self.fc(decoder_state)
        return output

In [None]:
class Transformer(nn.Module):
    def __init__(self, encoder, decoder, source_padding_index, target_padding_index):
        super(Transformer, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.source_padding_index = source_padding_index
        self.target_padding_index = target_padding_index

    def make_source_mask(self, src):
        source_mask = (src != self.source_padding_index).unsqueeze(1).unsqueeze(2).to(device)
        return source_mask

    def make_target_mask(self, trg):
        trg_length = trg.shape[1]
        pad_mask = (trg != self.target_padding_index).unsqueeze(1).unsqueeze(2).to(device)
        sub_mask = torch.tril(torch.ones((trg_length, trg_length), device = device)).bool()
        target_mask = pad_mask & sub_mask
        return target_mask

    def forward(self, source, target):
        source_mask = self.make_source_mask(source)
        target_mask = self.make_target_mask(target)
        encoder_output = self.encoder(source, source_mask)
        output = self.decoder(target, target_mask, encoder_output, source_mask)
        return output

In [None]:
class DNN(nn.Module):
    def init(self, input_dimension, hidden_dimension, output_dimension, dropout):
        super(DNN, self).init()
        self.fc_1 = nn.Linear(input_dimension, hidden_dimension)
        self.fc_2 = nn.Linear(hidden_dimension, hidden_dimension)
        self.fc_3 = nn.Linear(hidden_dimension, output_dimension)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.dropout(torch.relu(self.fc_1(x)))
        x = self.dropout(torch.relu(self.fc_2(x)))
        x = self.fc_3(x)
        return x

In [None]:
NUM_EPOCHS = 10
LR = 0.0005
CLIP = 1
SOURCE_VOCAB_SIZE = 513
HIDDEN_DIMENSION = 256
ENCODER_PFF_DIMENSION = 512
ENCODER_HEADS = 8
ENCODER_DROPOUT = 0.1
ENCODER_NUM_LAYERS = 3
MAX_LENGTH = 256

In [None]:
def calculate_metrics(y_true, y_pred):
    recall = recall_score(y_true, y_pred, average = 'macro')
    precision = precision_score(y_true, y_pred, average = 'macro')
    f1 = f1_score(y_true, y_pred, average = 'macro')
    return recall, precision, f1

In [None]:
def Train(iterator, model, criterion, optimizer, clip):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(iterator):
        optimizer.zero_grad()
        source = batch.src
        target = batch.trg
        outputs = model(source, target[:, :-1])
        outputs = outputs.contiguous().view(-1, outputs.shape[-1])
        targets = target[:, 1:].contiguous().view(-1).to(device)
        batch_loss = criterion(outputs, targets)
        batch_loss.backward()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += batch_loss.item()
    return epoch_loss / len(iterator)


In [None]:
def Evaluate(iterator, model, criterion):
    model.eval()
    eval_loss = 0
    with torch.no_grad():
        for _, batch in enumerate(iterator):
            source = batch.src
            target = batch.trg
            outputs = model(source, target[:, :-1])
            outputs = outputs.contiguous().view(-1, outputs.shape[-1])
            targets = target[:, 1:].contiguous().view(-1).to(device)
            batch_loss = criterion(outputs, targets)
            eval_loss += batch_loss.item()
        return eval_loss/len(iterator)

In [None]:
encoder = Encoder(SOURCE_VOCAB_SIZE, MAX_LENGTH, HIDDEN_DIMENSION, ENCODER_HEADS, ENCODER_PFF_DIMENSION, ENCODER_NUM_LAYERS, ENCODER_DROPOUT).to(device)
decoder = Decoder(TARGET_VOCAB_SIZE, MAX_LENGTH, HIDDEN_DIMENSION, DECODER_HEADS, DECODER_PFF_DIMENSION, DECODER_NUM_LAYERS, DECODER_DROPOUT).to(device)
transformer = Transformer(encoder, decoder, source_padding_index, target_padding_index).to(device)
dnn_model = DNN(512, HIDDEN_DIMENSION, 16, 0.2))
optimizer = optim.Adam(transformer.parameters(), LR)
criterion = nn.CrossEntropyLoss()

In [None]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

transformer.apply(initialize_weights)

In [None]:
print(f"Learning Rate: {LR}, Hidden Dimmensions: {HIDDEN_DIMENSION}")
train_losses = []
valid_losses = []
prev_epoch = 1
min_losses = [float('inf'), float('inf')]
for epoch in range(1, NUM_EPOCHS + 1):
    train_loss = Train(train_loader, encoder, criterion, optimizer, CLIP)
    train_losses.append(train_loss)
    valid_loss = Evaluate(test_loader, transformer, criterion)
    valid_losses.append(valid_loss)
    if valid_loss < min_losses[0]:
        min_losses[0] = valid_loss
        min_losses[1] = train_loss
    if epoch % int(NUM_EPOCHS / 10) == 0:
        prev_epoch = epoch + 1
        print(f"Training Loss: {train_loss:.4f} | Validation Loss: {valid_loss:.4f}")
        print(f"Training PPL: {math.exp(train_loss):.4f} | Validation PPL: {math.exp(valid_loss):.4f}")

In [None]:
transformer.eval()
test_loss = Evaluate(test_loader, transformer, criterion)

In [None]:
dnn_model.train()
epoch_loss = 0
for i, batch in enumerate(outs):
    optimizer.zero_grad()
    source = batch.src
    target = batch.trg
    outputs = dnn_model(source, target[:, :-1])
    outputs = outputs.contiguous().view(-1, outputs.shape[-1])
    targets = target[:, 1:].contiguous().view(-1).to(device)
    _,_,acc = calculate_metrics(targets,outputs.detach())
    loss = torch.tensor(acc,requires_grad=True)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(dnn_model.parameters(), CLIP)
    optimizer.step()
    epoch_loss += loss.item()

In [None]:
dnn_model.eval()
eval_loss = 0
with torch.no_grad():
    for _, batch in enumerate(batch):
        source = batch.src
        target = batch.trg
        outputs = dnn_model(source, target[:, :-1])
        outputs = outputs.contiguous().view(-1, outputs.shape[-1])
        targets = target[:, 1:].contiguous().view(-1).to(device)
        _,_,acc = calculate_metrics(targets,outputs.detach())
        loss = torch.tensor(acc,requires_grad=True)
        eval_loss += loss.item()