In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.optim.lr_scheduler import ReduceLROnPlateau


from get_loader import  get_loader, get_pad_index, get_vocab, show_image

from utils.utils import weights_matrix
from train import train, train_and_visualize_caps


from test import evaluate_caps, validate 
from model.model_dropout import EncoderDecoder_dropout


import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import wandb
import warnings
warnings.filterwarnings("ignore")
import pickle
import bcolz

  warn(f"Failed to load image Python extension: {e}")


In [2]:
main_path_miguel = 'C:/Users/Miguel/OneDrive/Escritorio/dlnn-project_ia-group_2/'
data_dir = main_path_miguel + 'data/Images/'
captions_file = main_path_miguel + 'data/captions.txt'

input_size = (224,224)

transform_train = transforms.Compose(
[
transforms.Resize(input_size),   
transforms.RandomHorizontalFlip(),          # ADDED RANDOM HORIZONTAL FLIP
transforms.ToTensor(),
transforms.Normalize(
    mean=[0.485, 0.456, 0.406],  # Normalize with the mean
    std=[0.229, 0.224, 0.225]    # Normalize with the standard deviation
)
])

transform_val = transforms.Compose(
[
transforms.Resize(input_size),
transforms.ToTensor(),
transforms.Normalize(
    mean=[0.485, 0.456, 0.406],  # Normalize with the mean
    std=[0.229, 0.224, 0.225]    # Normalize with the standard deviation
)
])

# Split data into train and test sets
df_captions = pd.read_csv(captions_file)
unique_images = df_captions['image'].unique()
train_images, testval_images = train_test_split(unique_images, test_size=0.25, random_state=42)
val_images, test_images = train_test_split(testval_images, test_size=0.5, random_state=42)

train_df = df_captions[df_captions['image'].isin(train_images)]
val_df = df_captions[df_captions['image'].isin(val_images)]
test_df = df_captions[df_captions['image'].isin(test_images)]

pad_index = get_pad_index(data_dir=data_dir, dataframe=train_df, transform=transform_train)

vocab_train_df = get_vocab(data_dir=data_dir, dataframe=train_df, transform=transform_train)
vocab_val_df = get_vocab(data_dir=data_dir, dataframe=val_df, transform=transform_val)
vocab_test_df = get_vocab(data_dir=data_dir, dataframe=test_df, transform=transform_val)

# Create train, validation, and test data loaders
train_dataloader = get_loader(data_dir=data_dir, dataframe=train_df, transform=transform_train, batch_size=32, num_workers=1, shuffle = True) # Batch size 32
val_dataloader = get_loader(data_dir=data_dir, dataframe=val_df, transform=transform_val, batch_size=8, num_workers=1, shuffle = False) # Batch size 8
test_dataloader = get_loader(data_dir=data_dir, dataframe=test_df, transform=transform_val, batch_size=8, num_workers=1)

In [3]:
# Hyperparameters
embed_size = 300  #Size of pretrained embedding
hidden_size = 512
vocab_size_train = len(vocab_train_df)
num_layers = 2
learning_rate = 0.001
num_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
# TENER EL PATH POR FUERA, ES DECIR NO EN EL REPOSITORIO YA Q SINO SE COLAPSA Y NO PODEMOS COMITEAR
# TIENE Q SER LA CARPETA CON LOS PICKLES YA CREADOS
miguel_path_glove = r"C:\Users\Miguel\OneDrive\Escritorio\2n curs\2n Semestre\Neural Networks and Deep Learning\Project\300dim_embedding"

raw_glove_path = miguel_path_glove
processed_glove_path = miguel_path_glove
vectors = bcolz.open(f'{raw_glove_path}/6B.300.dat')[:]
words = pickle.load(open(f'{processed_glove_path}/6B.300_words.pkl', 'rb'))
word2idx = pickle.load(open(f'{processed_glove_path}/6B.300_idx.pkl', 'rb'))

glove = {w: vectors[word2idx[w]] for w in words}
weights = weights_matrix(vocab_train_df, 300, glove)
weights = torch.Tensor(weights)

In [5]:
model = EncoderDecoder_dropout(embed_size, hidden_size, vocab_size_train, num_layers, drop_prob=0.3, weight_matrix=weights).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=pad_index)
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # We proved to apply weight decay that is L2 reg to prevent overfitting, but not worked well
# #scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True) # also tried schedulerç
print(model)

using pretrained embedding
EncoderDecoder_dropout(
  (encoder): EncoderCNN(
    (resnet): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   

In [None]:
### USING TRAIN AND VAL SEPARATED FUNCTIONS ONLY VISUALIZING LOSS VALUES FROM TRAINING AND VAL

losses = {"train": [], "val": []}
for epoch in range(num_epochs):

    train_loss = train(criterion, model, optimizer, train_dataloader, device)
    val_loss = validate(criterion, model, val_dataloader, device)
    print("Epoch: {}".format(epoch))
    print("Train set - Average Train Loss: {:.5f}".format(train_loss))
    print("Validation set - Average Validation Loss: {:.5f}".format(val_loss))
    losses["train"].append(train_loss)
    losses["val"].append(val_loss)


# Plot the losses
plt.figure(figsize=(10, 6))
plt.plot(losses["train"], label="Training Loss")
plt.plot(losses["val"], label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")
plt.show()

# RUN THE CELL BELOW TO SAVE THE MODEL IN A PKL TO THEN BE ABLE TO USED TO TEST

In [None]:
# TO SAVE THE TRAINED MODEL ABOVE
pickle.dump(model, open('model_with_dropout.pkl', 'wb'))

In [None]:
pickled_model_with_dropout = pickle.load(open('model_with_dropout.pkl', 'rb'))

In [None]:
# We test the model by analyzing the captions generated in the test set
evaluate_caps(pickled_model_with_dropout, test_dataloader, test_df, vocab_train_df, device)