<h1 style="font-size:200%; font-family:cursive; color:white;">1. Import Required Libraries & Dataset</h1>

In [2]:
import numpy as np
import pandas as pd
import ast
#import torch
#import torch.nn as nn
#from sklearn.metrics import classification_report
#from transformers import AutoModel, BertTokenizer

# specify GPU
#device = torch.device("cuda")

In [3]:
df_train = pd.read_csv("train_vids.csv")
df_val = pd.read_csv("val_vids.csv")
df_test = pd.read_csv("test_vids.csv")
df_train['points'] = df_train['points'].apply(ast.literal_eval)
df_val['points'] = df_val['points'].apply(ast.literal_eval)
df_test['points'] = df_test['points'].apply(ast.literal_eval)
max_len_train = df_train['points'].apply(lambda x: len(x)).max()
max_len_val = df_val['points'].apply(lambda x: len(x)).max()
max_len_test = df_test['points'].apply(lambda x: len(x)).max()

In [4]:
df_train.head(), df_val.head(), df_test.head(), max_len_train, max_len_val, max_len_test

(                                              points  \
 0  [[0.0, 1.0, 0.6193, 1.0, 0.0902, 0.6181, 0.947...   
 1  [[1.0, 0.9541, 1.0, 1.0, 0.867, 0.539, 0.9602,...   
 
                                          translation  id  
 0  Okay sausage today we're going to do a creole ...   0  
 1  Today I am going to be showing you how to make...   1  ,
                                               points  \
 0  [[1.0, 0.9541, 1.0, 1.0, 0.867, 0.539, 0.9602,...   
 
                                          translation  id  
 0  Today I am going to be showing you how to make...   0  ,
                                               points  \
 0  [[1.0, 0.9541, 1.0, 1.0, 0.867, 0.539, 0.9602,...   
 
                                          translation  id  
 0  Today I am going to be showing you how to make...   0  ,
 65,
 65,
 65)

In [5]:
def add_padding(max_frames, pointSeries: pd.Series):
    for i in range(len(pointSeries)):
        current_length = len(pointSeries[i])
        if current_length < max_frames:
            padding = np.full(
                (max_frames - current_length, 300), 
                -1
            )
            padding[:, 3::4] = 0
            pointSeries[i] = np.concatenate((pointSeries[i], padding), axis=0)

    return pointSeries

In [7]:
df_train["points"] = add_padding(max_len_train, df_train["points"])
df_val["points"] = add_padding(max_len_val, df_val["points"])
df_test["points"] = add_padding(max_len_test, df_test["points"])

((2,), (1,), (1,))

<h1 style="font-size:200%; font-family:cursive; color:white;">3. Import Bert - base- uncased</h1>

In [None]:
# import BERT-base pretrained model
bert = AutoModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
# tokenize and encode sequences in the training set
tokens_label_train = tokenizer.batch_encode_plus(
    df_train['translation'].tolist(),
    padding = True
)

# tokenize and encode sequences in the validation set
tokens_label_val = tokenizer.batch_encode_plus(
    df_val['translation'].tolist(),
    padding = True
)

# tokenize and encode sequences in the test set
tokens_label_test = tokenizer.batch_encode_plus(
    df_test['translation'].tolist(),
    padding = True
)

<u><h2 style="font-size:170%; font-family:cursive;">What is the maximum sequence length of the input?</h2></u>

<p style="font-size:150%; font-family:verdana;">The maximum sequence length of the input = 512</p>

<h1 style="font-size:200%; font-family:cursive; color:white;">5. List to Tensors</h1>

In [8]:
def create_attention_mask_from_points(seq_tensor):
    mask = torch.ones(seq_tensor.shape, dtype=torch.long)
    missing_data = (seq_tensor[..., :3] == -1).all(dim=-1) & (seq_tensor[..., 3] == 0)
    mask[missing_data] = 0
    return mask

In [9]:
# convert lists to tensors

train_seq = torch.stack([torch.tensor(seq) for seq in df_train['points'].tolist()])
train_mask = create_attention_mask_from_points(train_seq)
#train_y = tokens_label_train['input_ids']

val_seq = torch.stack([torch.tensor(seq) for seq in df_val['points'].tolist()])
val_mask = create_attention_mask_from_points(val_seq)
#val_y = tokens_label_val['input_ids']

test_seq = torch.stack([torch.tensor(seq) for seq in df_test['points'].tolist()])
test_mask = create_attention_mask_from_points(test_seq)
#test_y = tokens_label_test['input_ids']

<h1 style="font-size:200%; font-family:cursive; color:white;">6. Data Loader</h1>

In [None]:

from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

#define a batch size
batch_size = 32

# wrap tensors
train_data = TensorDataset(train_seq, train_mask, train_y)

# sampler for sampling the data during training
train_sampler = RandomSampler(train_data)

# dataLoader for train set
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

# wrap tensors
val_data = TensorDataset(val_seq, val_mask, val_y)

# sampler for sampling the data during training
val_sampler = SequentialSampler(val_data)

# dataLoader for validation set
val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)

<h1 style="font-size:200%; font-family:cursive; color:white;">7. Model Architecture</h1>

In [None]:
vocab_size = tokenizer.vocab_size

In [None]:
class BERT_Arch(nn.Module):

    def __init__(self, bert, vocab_size):
        super(BERT_Arch, self).__init__()
        
        self.bert = bert 
        
        # Dropout layer
        self.dropout = nn.Dropout(0.1)
        
        # ReLU activation function
        self.relu = nn.ReLU()

        # Dense layer 1
        self.fc1 = nn.Linear(768, 512)
        
        # Dense layer 2 (Output layer)
        self.fc2 = nn.Linear(512, vocab_size)  # Use tokenizer's vocab size

        # Softmax activation function
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, seq_input, mask):
        outputs = self.bert(seq_input, attention_mask=mask, return_dict=False)
        
        # Use the last hidden state for each token (outputs[0])
        x = self.fc1(outputs[0])  # (batch_size, sequence_length, 512)

        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)  # (batch_size, sequence_length, vocab_size)
        
        # Apply softmax activation (for each token in the sequence)
        x = self.softmax(x)

        return x


In [None]:
# pass the pre-trained BERT to our define architecture
model = BERT_Arch(bert)

# push the model to GPU
model = model.to(device)

In [None]:
# optimizer from hugging face transformers
from transformers import AdamW

# define the optimizer
optimizer = AdamW(model.parameters(),lr = 1e-5) 

In [None]:
# Define the loss function
cross_entropy = nn.CrossEntropyLoss()

# number of training epochs
epochs = 10

<h1 style="font-size:200%; font-family:cursive; color:white;">8. Fine - Tune</h1>

In [None]:
def train():
    
    model.train()
    total_loss = 0
    total_preds = []

    # iterate over batches
    for step,batch in enumerate(train_dataloader):
        
        # progress update after every 50 batches.
        if step % 50 == 0 and not step == 0:
            print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))
        
        # push the batch to gpu
        batch = [r.to(device) for r in batch]

        sent_id, mask, labels = batch
        
        # clear previously calculated gradients 
        model.zero_grad()        

        # get model predictions for the current batch
        preds = model(sent_id, mask)

        # compute the loss between actual and predicted values
        loss = cross_entropy(preds, labels)

        # add on to the total loss
        total_loss = total_loss + loss.item()

        # backward pass to calculate the gradients
        loss.backward()

        # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # update parameters
        optimizer.step()

        # model predictions are stored on GPU. So, push it to CPU
        preds=preds.detach().cpu().numpy()

    # append the model predictions
    total_preds.append(preds)

    # compute the training loss of the epoch
    avg_loss = total_loss / len(train_dataloader)

    # predictions are in the form of (no. of batches, size of batch, no. of classes).
    # reshape the predictions in form of (number of samples, no. of classes)
    total_preds  = np.concatenate(total_preds, axis=0)

    #returns the loss and predictions
    return avg_loss, total_preds

In [None]:
# function for evaluating the model
def evaluate():
    
    print("\nEvaluating...")
  
    # deactivate dropout layers
    model.eval()

    total_loss, total_accuracy = 0, 0
    
    # empty list to save the model predictions
    total_preds = []

    # iterate over batches
    for step,batch in enumerate(val_dataloader):
        
        # Progress update every 50 batches.
        if step % 50 == 0 and not step == 0:
            
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)
            
            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(val_dataloader)))

        # push the batch to gpu
        batch = [t.to(device) for t in batch]

        sent_id, mask, labels = batch

        # deactivate autograd
        with torch.no_grad():
            
            # model predictions
            preds = model(sent_id, mask)

            # compute the validation loss between actual and predicted values
            loss = cross_entropy(preds,labels)

            total_loss = total_loss + loss.item()

            preds = preds.detach().cpu().numpy()

            total_preds.append(preds)

    # compute the validation loss of the epoch
    avg_loss = total_loss / len(val_dataloader) 

    # reshape the predictions in form of (number of samples, no. of classes)
    total_preds  = np.concatenate(total_preds, axis=0)

    return avg_loss, total_preds

In [None]:
# set initial loss to infinite
best_valid_loss = float('inf')

# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]

#for each epoch
for epoch in range(epochs):
     
    print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
    
    #train model
    train_loss, _ = train()
    
    #evaluate model
    valid_loss, _ = evaluate()
    
    #save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'saved_weights.pt')
    
    # append training and validation loss
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    
    print(f'\nTraining Loss: {train_loss:.3f}')
    print(f'Validation Loss: {valid_loss:.3f}')

In [None]:
#load weights of best model
path = 'saved_weights.pt'
model.load_state_dict(torch.load(path))

<h1 style="font-size:200%; font-family:cursive; color:white;">9. Make Predictions</h1>

In [None]:
# get predictions for test data
with torch.no_grad():
    preds = model(test_seq.to(device), test_mask.to(device))
    preds = preds.detach().cpu().numpy()

In [None]:
# model's performance
preds = np.argmax(preds, axis = 1)
print(classification_report(test_y, preds))