In [None]:
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.optim.lr_scheduler import ReduceLROnPlateau

from get_loader import  get_loader, get_pad_index, get_vocab
from utils.utils import weights_matrix
from train import train
from test import evaluate_caps, validate, average_test_BLEU  
from model.model_dropout import EncoderDecoder
from train_test_val_split import train_test_val_split

import matplotlib.pyplot as plt
import wandb
import warnings
warnings.filterwarnings("ignore")
import pickle
import bcolz


In [None]:
# Getting the dataset paths (is NOT in the repo)
main_path_miguel = 'C:/Users/Miguel/OneDrive/Escritorio/2n curs/2n Semestre/Neural Networks and Deep Learning/Project/data_8k'
main_path_pol = r"C:\Users\polme\OneDrive - UAB\Escritorio\Universitat\Deep Learning\FinalProject\data30k"

data_dir = main_path_pol + '/Images/'
captions_file = main_path_pol + '/captions.txt'

# Train, val, test split
train_df, val_df, test_df = train_test_val_split(captions_file) 

# Define transfroms

input_size = (224,224)

transform_train = transforms.Compose(
[
transforms.Resize(input_size),   
transforms.RandomHorizontalFlip(), # Adding random hoizontal flip
transforms.ToTensor(),
transforms.Normalize(
    mean=[0.485, 0.456, 0.406],  # Normalize with the mean
    std=[0.229, 0.224, 0.225]    # Normalize with the standard deviation
)
])

transform_val = transforms.Compose(
[
transforms.Resize(input_size),
transforms.ToTensor(),
transforms.Normalize(
    mean=[0.485, 0.456, 0.406],  # Normalize with the mean
    std=[0.229, 0.224, 0.225]    # Normalize with the standard deviation
)
])

# Getting the index of the <PAD> token 
pad_index = get_pad_index(data_dir=data_dir, dataframe=train_df, transform=transform_train)

# Getting the vocabulary of each split
vocab_train_df = get_vocab(data_dir=data_dir, dataframe=train_df, transform=transform_train)
vocab_val_df = get_vocab(data_dir=data_dir, dataframe=val_df, transform=transform_val)
vocab_test_df = get_vocab(data_dir=data_dir, dataframe=test_df, transform=transform_val)

# Create train, validation, and test data loaders
train_dataloader = get_loader(data_dir=data_dir, dataframe=train_df, transform=transform_train, batch_size=128, num_workers=1, shuffle = True) # Batch size 32
val_dataloader = get_loader(data_dir=data_dir, dataframe=val_df, transform=transform_val, batch_size=64, num_workers=1, shuffle = False) # Batch size 8
test_dataloader = get_loader(data_dir=data_dir, dataframe=test_df, transform=transform_val, batch_size=8, num_workers=1)



In [None]:
# Hyperparameters
embed_size = 300 # Size of pretrained embedding
hidden_size = 512
vocab_size_train = len(vocab_train_df)
num_layers = 2 # Number of lstm layers
learning_rate = 0.001
num_epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
# To use the pretrained embedding
# Pickles and files should be stored outside the repo and can be generated from the txt using the generate_embedding.py
miguel_path_glove = r"C:\Users\Miguel\OneDrive\Escritorio\2n curs\2n Semestre\Neural Networks and Deep Learning\Project\300dim_embedding"
pol_path_glove = r"C:\Users\polme\OneDrive - UAB\Escritorio\Universitat\Deep Learning\Embeddings\300dim_embedding"

raw_glove_path = pol_path_glove
processed_glove_path = pol_path_glove
vectors = bcolz.open(f'{raw_glove_path}/6B.300.dat')[:]
words = pickle.load(open(f'{processed_glove_path}/6B.300_words.pkl', 'rb'))
word2idx = pickle.load(open(f'{processed_glove_path}/6B.300_idx.pkl', 'rb'))

glove = {w: vectors[word2idx[w]] for w in words}
weights = weights_matrix(vocab_train_df, 300, glove)
weights = torch.Tensor(weights)

In [None]:
model = EncoderDecoder(embed_size, hidden_size, vocab_size_train, num_layers, weight_matrix=weights, finetune_embedding=True).to(device)

# Ignoring padding when calculating the loss
criterion = nn.CrossEntropyLoss(ignore_index=pad_index) 

# We tried to apply weight decay (L2 reg) to prevent overfitting, but not worked well
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # We proved to apply weight decay that is L2 reg to prevent overfitting, but not worked well
#scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True) # also tried scheduler

print(model)

In [None]:
# Training the model
losses = {"train": [], "val": []}
for epoch in range(num_epochs):

    train_loss = train(criterion, model, optimizer, train_dataloader, device)
    val_loss = validate(criterion, model, val_dataloader, device)
    print("Epoch: {}".format(epoch))
    print("Train set - Average Train Loss: {:.5f}".format(train_loss))
    print("Validation set - Average Validation Loss: {:.5f}".format(val_loss))
    losses["train"].append(train_loss)
    losses["val"].append(val_loss)


# Plot the losses
plt.figure(figsize=(10, 6))
plt.plot(losses["train"], label="Training Loss")
plt.plot(losses["val"], label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")
plt.show()

# RUN THE CELL BELOW TO SAVE THE MODEL IN A PKL TO THEN BE ABLE TO USED TO TEST


In [None]:
# We have to save it outside the git repo as it is a very big file and github does not allow to uploaded it
miguel_path_pickles = 'C:/Users/Miguel/OneDrive/Escritorio/2n curs/2n Semestre/Neural Networks and Deep Learning/Project/training_models'
pol_path_pickles = r"C:\Users\polme\OneDrive - UAB\Escritorio\Universitat\Deep Learning\FinalProject\pkl files"

In [None]:
# TO SAVE THE TRAINED MODEL ABOVE
pickle.dump(model, open(pol_path_pickles + '/30k_model.pkl', 'wb'))

In [None]:
# TO OPEN THE MODEL AND USE IT TO TEST 
pickled_model_without_reg = pickle.load(open(miguel_path_pickles + '/30k_model.pkl', 'rb'))

In [None]:
# We test the model by analyzing the captions generated in the test set
evaluate_caps(pickled_model_without_reg, test_dataloader, test_df, vocab_train_df, device)

In [None]:
average_test_BLEU(pickled_model_without_reg, test_dataloader, test_df, vocab_train_df, device)