<h1 style="font-size:200%; font-family:cursive; color:white;">1. Import Required Libraries & Dataset</h1>

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from transformers import AutoModel, BertTokenizer
import random

#specify GPU
device = torch.device("cuda")

### Check if Pytorch is using GPU

In [2]:
torch.cuda.is_available(), torch.cuda.device_count(), torch.cuda.current_device(), torch.cuda.device(0), torch.cuda.get_device_name(0), device

(True,
 1,
 0,
 <torch.cuda.device at 0x7efc1846aa10>,
 'NVIDIA RTX 3500 Ada Generation Laptop GPU',
 device(type='cuda'))

In [3]:
df_videos = pd.read_csv("/app/AI-Module/Resources/Datasets/how2sign.csv")
#df_videos = pd.read_csv("C:/Users/48519558/Desktop/SignAI-ML/AI-Module/Resources/Datasets/how2sign.csv")

In [None]:
point_list = pd.read_pickle("/app/AI-Module/Resources/Datasets/how2sign_points.pkl")
#point_list = pd.read_pickle("C:/Users/48519558/Desktop/SignAI-ML/AI-Module/Resources/Datasets/how2sign_points.pkl")

In [None]:
type(point_list.iloc[0)

In [6]:
df_videos["points"] = point_list

In [None]:
max_len = df_videos['points'].apply(lambda x: len(x)).max()
df_videos.head(), max_len

In [13]:
def add_padding(max_frames, pointSeries: pd.Series):
    for i in range(len(pointSeries)):
        current_length = len(pointSeries[i])
        if current_length < max_frames:
            padding = np.full(
                (max_frames - current_length, 2172), 
                -1
            )
            padding[:, 3::4] = 0
            pointSeries[i] = np.concatenate((pointSeries[i], padding), axis=0)

    return pointSeries

In [None]:
df_videos['points'] = add_padding(max_len, df_videos['points'])

In [None]:
X_df = df_videos['points'].to_frame()
y_df = df_videos['translation'].to_frame()
seed = 31991
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=(100/500), random_state=seed)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=(100/400), random_state=seed)

In [None]:
X_train.head(), y_train.head(), X_val.head(), y_val.head(), X_test.head(), y_test.head()

In [None]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

<h1 style="font-size:200%; font-family:cursive; color:white;">3. Import Bert - base- uncased</h1>

In [None]:
# import BERT-base pretrained model
bert = AutoModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [16]:
# tokenize and encode sequences in the training set
tokens_label_train = tokenizer.batch_encode_plus(
    y_train['translation'].tolist(),
    padding = True
)

# tokenize and encode sequences in the validation set
tokens_label_val = tokenizer.batch_encode_plus(
    y_val['translation'].tolist(),
    padding = True
)

# tokenize and encode sequences in the test set
tokens_label_test = tokenizer.batch_encode_plus(
    y_test['translation'].tolist(),
    padding = True
)

<u><h2 style="font-size:170%; font-family:cursive;">What is the maximum sequence length of the input?</h2></u>

<p style="font-size:150%; font-family:verdana;">The maximum sequence length of the input = 512</p>

<h1 style="font-size:200%; font-family:cursive; color:white;">5. List to Tensors</h1>

In [8]:
def create_attention_mask_from_points(seq_tensor):
    mask = torch.ones(seq_tensor.shape, dtype=torch.long)
    missing_data = (seq_tensor[..., :3] == -1).all(dim=-1) & (seq_tensor[..., 3] == 0)
    mask[missing_data] = 0
    return mask

In [9]:
# convert lists to tensors

train_seq = torch.stack([torch.tensor(seq) for seq in X_train['points'].tolist()])
train_mask = create_attention_mask_from_points(train_seq)
train_y = tokens_label_train['input_ids']

val_seq = torch.stack([torch.tensor(seq) for seq in X_val['points'].tolist()])
val_mask = create_attention_mask_from_points(val_seq)
val_y = tokens_label_val['input_ids']

test_seq = torch.stack([torch.tensor(seq) for seq in X_test['points'].tolist()])
test_mask = create_attention_mask_from_points(test_seq)
test_y = tokens_label_test['input_ids']

<h1 style="font-size:200%; font-family:cursive; color:white;">6. Data Loader</h1>

In [None]:

from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

#define a batch size
batch_size = 32

# wrap tensors
train_data = TensorDataset(train_seq, train_mask, train_y)

# sampler for sampling the data during training
train_sampler = RandomSampler(train_data)

# dataLoader for train set
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

# wrap tensors
val_data = TensorDataset(val_seq, val_mask, val_y)

# sampler for sampling the data during training
val_sampler = SequentialSampler(val_data)

# dataLoader for validation set
val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)

<h1 style="font-size:200%; font-family:cursive; color:white;">7. Model Architecture</h1>

In [None]:
vocab_size = tokenizer.vocab_size
class BERT_Arch(nn.Module):

    def __init__(self, bert, vocab_size):
        super(BERT_Arch, self).__init__()
        
        self.bert = bert 
        
        # Dropout layer
        self.dropout = nn.Dropout(0.1)
        
        # ReLU activation function
        self.relu = nn.ReLU()

        # Dense layer 1
        self.fc1 = nn.Linear(768, 512)
        
        # Dense layer 2 (Output layer)
        self.fc2 = nn.Linear(512, vocab_size)  # Use tokenizer's vocab size

        # Softmax activation function
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, seq_input, mask):
        outputs = self.bert(seq_input, attention_mask=mask, return_dict=False)
        
        # Use the last hidden state for each token (outputs[0])
        x = self.fc1(outputs[0])  # (batch_size, sequence_length, 512)

        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)  # (batch_size, sequence_length, vocab_size)
        
        # Apply softmax activation (for each token in the sequence)
        x = self.softmax(x)

        return x

In [None]:
def compute_bleu(preds, labels):
    # Aquí puedes usar la función BLEU de NLTK o cualquier otra implementación de BLEU
    # Convertir las predicciones y los labels a listas de tokens
    pred_tokens = torch.argmax(preds, dim=-1).tolist()
    label_tokens = labels.tolist()
    
    # Calcular BLEU usando, por ejemplo, nltk
    bleu_score = nltk.translate.bleu_score.sentence_bleu([label_tokens], pred_tokens)
    
    return bleu_score

In [None]:
# pass the pre-trained BERT to our define architecture
model = BERT_Arch(bert, vocab_size)

# push the model to GPU
model = model.to(device)

# optimizer from hugging face transformers
from transformers import AdamW

# define the optimizer
optimizer = AdamW(model.parameters(),lr = 1e-5) 

# Define the loss function
cross_entropy = nn.CrossEntropyLoss()

# number of training epochs
epochs = 10

<h1 style="font-size:200%; font-family:cursive; color:white;">8. Fine - Tune</h1>

In [None]:
def perturb_hyperparameters(hyperparameters):
    new_hyperparameters = hyperparameters.copy()
    new_hyperparameters['learning_rate'] = hyperparameters['learning_rate'] * random.uniform(0.8, 1.2)
    return new_hyperparameters


def train(hyperparameters):
    model.train()  # Set the model to training mode
    total_loss = 0.0
    total_bleu = 0.0

    # Loop through batches of training data (asegúrate de tener un dataloader)
    for batch in train_dataloader:
        optimizer.zero_grad()  # Reset gradients
        
        input_ids, attention_mask, labels = batch  # Extract input and labels
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(input_ids, attention_mask)
        loss = cross_entropy(outputs.view(-1, vocab_size), labels.view(-1))
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Accumulate loss and BLEU score for the batch
        total_loss += loss.item()
        bleu_score = compute_bleu(outputs, labels)  # Implementar la función compute_bleu
        total_bleu += bleu_score

    # Calculate average loss and BLEU score for the epoch
    avg_loss = total_loss / len(train_dataloader)
    avg_bleu = total_bleu / len(train_dataloader)

    return avg_loss, avg_bleu


def evaluate(hyperparameters):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_bleu = 0.0

    with torch.no_grad():
        for batch in valid_dataloader:
            input_ids, attention_mask, labels = batch
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(input_ids, attention_mask)
            loss = cross_entropy(outputs.view(-1, vocab_size), labels.view(-1))
            
            # Accumulate loss and BLEU score
            total_loss += loss.item()
            bleu_score = compute_bleu(outputs, labels)  # Implementar la función compute_bleu
            total_bleu += bleu_score

    # Calculate average loss and BLEU score for the validation set
    avg_loss = total_loss / len(valid_dataloader)
    avg_bleu = total_bleu / len(valid_dataloader)

    return avg_loss, avg_bleu


In [None]:
# Configuración inicial
best_valid_loss = float('inf')
best_bleu_score = 0
train_losses, valid_losses = [], []
train_bleu_scores, valid_bleu_scores = []

# Simulación de Population Based Training con múltiples ensayos
num_trials = 8
trials = [{'hyperparameters': hyperparameters.copy(), 'state': None} for _ in range(num_trials)]

for epoch in range(epochs):
    
    print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
    
    for trial_id, trial in enumerate(trials):
        # Restaurar el estado del modelo si existe (evitando reentrenamiento desde cero)
        if trial['state']:
            model.load_state_dict(trial['state'])
        
        # Entrenar y evaluar el modelo con los hiperparámetros actuales
        train_loss, train_bleu = train(trial['hyperparameters'])
        valid_loss, valid_bleu = evaluate(trial['hyperparameters'])
        
        # Si la pérdida de validación mejora, actualizar el mejor modelo
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_bleu_score = valid_bleu
            torch.save(model.state_dict(), 'saved_weights.pt')
        
        # Guardar estado y resultados
        trial['state'] = model.state_dict().copy()
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        train_bleu_scores.append(train_bleu)
        valid_bleu_scores.append(valid_bleu)
        
        # Perturbar los hiperparámetros en los ensayos de bajo rendimiento
        if valid_loss > best_valid_loss * 1.1:
            print(f'\nTrial {trial_id} underperforming, copying weights from better performing trials...')
            good_trial = random.choice([t for t in trials if t['hyperparameters'] != trial['hyperparameters']])
            trial['state'] = good_trial['state']
            trial['hyperparameters'] = perturb_hyperparameters(good_trial['hyperparameters'])
        
        # Imprimir estadísticas de entrenamiento
        print(f'Trial {trial_id} | Training Loss: {train_loss:.3f} | Training BLEU: {train_bleu:.3f}')
        print(f'Validation Loss: {valid_loss:.3f} | Validation BLEU: {valid_bleu:.3f}')

In [None]:
#load weights of best model
path = 'saved_weights.pt'
model.load_state_dict(torch.load(path))

<h1 style="font-size:200%; font-family:cursive; color:white;">9. Make Predictions</h1>

In [None]:
# get predictions for test data
with torch.no_grad():
    preds = model(test_seq.to(device), test_mask.to(device))
    preds = preds.detach().cpu().numpy()

In [None]:
# model's performance
preds = np.argmax(preds, axis = 1)
print(classification_report(test_y, preds))