In [2]:
import os
import random
import wandb

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T

from train import *
from test import *
from utils.utils import *
from models.models import *
import multiprocessing



# Global variables
global device

import os

# Setting CUDA ALLOC split size to 256 to avoid running out of memory
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
# Stopping wandb from creating symlinks
os.environ["WANDB_DISABLE_SYMLINKS"] = "true"

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


def model_pipeline(cfg: dict):
    # tell wandb to get started
    with wandb.init(project="pytorch-demo", config=cfg):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config

        # execute only once to create the dataset
        # generate_and_dump_dataset(config.root_dir, config.captions_file, config.transforms, cfg.DATA_LOCATION)

        # Generate Dataset
        dataset = make_dataset(config)

        # make the data_loaders, and optimizer
        t0 = time.time()
        train_loader, test_loader = make_dataloaders(config, dataset, 1)
        t1 = time.time()
        print("Preprocessing_time:", t1-t0)

        # Generate vocab
        vocab = dataset.vocab
        config.vocab_size = len(vocab)


        # Get the model
        my_model = make_model(config, device)

        # Make the loss and optimizer
        criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi["<PAD>"])
        optimizer = torch.optim.Adam(my_model.parameters(), lr=config.learning_rate)

        train_loss_arr_epoch = []  # Mean of the losses of the last epoch
        test_loss_arr_epoch = []
        acc_arr_epoch = []

        train_loss_arr_batch = [] # Losses of the batches
        test_loss_arr_batch = []
        acc_arr_batch = []

        train_execution_times = []
        test_execution_times = []

        for epoch in tqdm(range(1, config.epochs + 1)):
            # Training the model
            t0 = time.time()
            train_loss_arr_aux = train(my_model, train_loader, criterion, optimizer, config, epoch)
            t1 = time.time()

            my_model.eval()
            # Testing
            t2 = time.time()
            acc_arr_aux, test_loss_arr_aux = test(my_model, test_loader, criterion, vocab, config, device)
            t3 = time.time()

            # Check how model performs
            test_model_performance(my_model, test_loader, device, vocab, epoch, config)

            my_model.train()

            # Logging data for vizz
            train_loss_arr_epoch.append(sum(train_loss_arr_aux) / len(train_loss_arr_aux))
            test_loss_arr_epoch.append(sum(test_loss_arr_aux) / len(test_loss_arr_aux))

            train_loss_arr_batch += train_loss_arr_aux
            test_loss_arr_batch += test_loss_arr_aux

            acc_arr_epoch.append(sum(acc_arr_aux) / len(acc_arr_aux))
            acc_arr_batch += acc_arr_aux

            train_execution_times.append(t1-t0)
            test_execution_times.append(t3-t2)

        epoch_df = pd.DataFrame([train_loss_arr_epoch, test_loss_arr_epoch, acc_arr_epoch, train_execution_times,
                                 test_execution_times],
                                columns=['epoch_' + str(i) for i in range(len(train_loss_arr_epoch))],
                                index=['train_loss', 'test_loss' ,'test_acc', 'train_times','test_times'])
        loss_batch_df = pd.DataFrame([train_loss_arr_batch],
                                    columns=['batch_' + str(i) for i in range(len(train_loss_arr_batch))],
                                    index=['train_loss'])
        acc_batch_df = pd.DataFrame([acc_arr_batch, test_loss_arr_batch],
                                    columns=['batch_' + str(i) for i in range(len(acc_arr_batch))],
                                    index=['test_acc', 'test_loss'])

        if config.save:
            epoch_df.to_csv(config.DATA_LOCATION+'/logs'+'/epoch_df.csv')
            loss_batch_df.to_csv(config.DATA_LOCATION+'/logs'+'/loss_batch_df.csv')
            acc_batch_df.to_csv(config.DATA_LOCATION+'/logs'+'/acc_batch_df.csv')
            save_model(my_model, config, config.DATA_LOCATION+'/logs'+'/EncoderDecorder_model.pth')

    return my_model


if __name__ == "__main__":
    wandb.login()

    print("Using: ", device)

    transforms = T.Compose([
        T.Resize(226),
        T.RandomCrop(224),
        T.ToTensor(),
        T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    DATA_LOCATION = '../data'

    config = dict(
        root_dir=DATA_LOCATION+"/Images",
        captions_file=DATA_LOCATION+"/captions.txt",
        device=device,
        encoder='ResNet50',
        transforms=transforms,
        embed_size=300,
        attention_dim=256,
        encoder_dim=2048,
        decoder_dim=512,
        epochs=20,
        learning_rate=0.0001,
        batch_size=50,
        DATA_LOCATION=DATA_LOCATION,
        train_size=0.8,
        save=True,
        momentum=0.8
    )

    model = model_pipeline(config)


Using:  cuda:0


Preprocessing_time: 53.449002265930176


  0%|          | 0/20 [00:00<?, ?it/s]

Loss after 00050 examples: 8.017
Loss after 00100 examples: 7.989
Loss after 00150 examples: 7.955
Loss after 00200 examples: 7.931
Loss after 00250 examples: 7.891
Loss after 00300 examples: 7.845
Loss after 00350 examples: 7.818
Loss after 00400 examples: 7.755
Loss after 00450 examples: 7.768
Loss after 00500 examples: 7.701
Loss after 00550 examples: 7.619
Loss after 00600 examples: 7.558
Loss after 00650 examples: 7.512
Loss after 00700 examples: 7.488
Loss after 00750 examples: 7.353
Loss after 00800 examples: 7.316
Loss after 00850 examples: 7.200
Loss after 00900 examples: 7.179
Loss after 00950 examples: 7.141
Loss after 01000 examples: 7.010
Loss after 01050 examples: 6.873
Loss after 01100 examples: 6.796
Loss after 01150 examples: 6.746
Loss after 01200 examples: 6.628
Loss after 01250 examples: 6.497
Loss after 01300 examples: 6.479
Loss after 01350 examples: 6.426
Loss after 01400 examples: 6.260
Loss after 01450 examples: 6.288
Loss after 01500 examples: 6.027
Loss after

Loss after 12500 examples: 4.371
Loss after 12550 examples: 4.218
Loss after 12600 examples: 4.453
Loss after 12650 examples: 4.376
Loss after 12700 examples: 4.176
Loss after 12750 examples: 4.474
Loss after 12800 examples: 4.209
Loss after 12850 examples: 4.284
Loss after 12900 examples: 4.360
Loss after 12950 examples: 4.360
Loss after 13000 examples: 4.321
Loss after 13050 examples: 4.187
Loss after 13100 examples: 4.304
Loss after 13150 examples: 4.323
Loss after 13200 examples: 4.343
Loss after 13250 examples: 4.431
Loss after 13300 examples: 4.323
Loss after 13350 examples: 4.266
Loss after 13400 examples: 4.205
Loss after 13450 examples: 4.297
Loss after 13500 examples: 4.322
Loss after 13550 examples: 4.340
Loss after 13600 examples: 4.337
Loss after 13650 examples: 4.217
Loss after 13700 examples: 4.204
Loss after 13750 examples: 4.112
Loss after 13800 examples: 4.356
Loss after 13850 examples: 4.436
Loss after 13900 examples: 4.242
Loss after 13950 examples: 4.316
Loss after

VBox(children=(Label(value='0.001 MB of 0.365 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.003130…

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,█▇▆▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂▂▁▂▁▂▁▁▁▁▁

0,1
epoch,1.0
loss,3.77775


KeyboardInterrupt: 

In [3]:
with wandb.init(project="pytorch-demo", config=config):
    config = wandb.config

    # Generate Dataset
    dataset = make_dataset(config)

    # make the data_loaders, and optimizer
    #train_loader, test_loader = make_dataloaders(config, dataset, 1)

In [4]:
data_list = preprocess_dataset(dataset)

In [5]:
data_list[:3]

[[tensor([[[-0.9019, -0.1315, -0.2341,  ..., -1.3301, -1.5527, -1.2783],
           [-1.0215, -0.1829, -0.2170,  ..., -1.4502, -1.5869, -1.6895],
           [-1.0566, -0.1656, -0.2000,  ..., -1.6211, -1.5527, -1.6387],
           ...,
           [ 1.6670,  1.4951,  0.4680,  ...,  1.5293,  0.6733,  0.7246],
           [ 0.8789,  0.1083,  0.0056,  ...,  1.4951,  0.6733,  0.6904],
           [ 0.6904,  1.5469,  0.9644,  ...,  1.4951,  0.7075,  0.6904]],
  
          [[-0.7925,  0.0301, -0.0049,  ..., -1.2832, -1.5107, -0.9502],
           [-0.9678, -0.0224,  0.0476,  ..., -1.4229, -1.5635, -1.5801],
           [-1.0029, -0.0049,  0.0301,  ..., -1.5459, -1.4404, -1.5459],
           ...,
           [ 1.2207,  0.7656, -0.6177,  ...,  1.8506,  1.2031,  1.1855],
           [-0.2500, -0.4775, -0.4602,  ...,  1.8330,  1.2207,  1.2031],
           [ 0.0476,  1.0459,  0.3103,  ...,  1.8154,  1.2031,  1.1855]],
  
          [[-0.6021,  0.1302,  0.0779,  ..., -1.2812, -1.4902, -1.2637],
           

In [None]:
my_iter = iter(train_loader)
img, cap = next(my_iter)

In [None]:
cap

In [15]:
len(img)

50

In [7]:
img2, cap2 = next(my_iter)

In [8]:
cap2

tensor([[   1,    4,    9,    7,   32,   10,  711,   27,  104, 2409,    5,    2,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9,    7,   32,   76,    4,  157, 2409,    5,    2,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    7,  316,   76,    4,  157,   74,    5,    2,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9,    7,    8,    4,  195,  151,  316,   76,    4,  157,
            3,    5,    2,    0,    0,    0,    0,    0],
        [   1,    4,   28,    8,    4,  195,  151,   17,   32,   67,    4,  353,
           11,  711,    8,   24,    3,  496,    5,    2],
        [   1,    4,    7,  316,   76,    4,  157,   74,    5,    2,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9,    7,   32,   10,  711,   27,  104, 2409,    5,    2,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9, 

In [17]:
image,captions = next(iter(train_loader))

In [21]:
image, captions = image.to(device), captions.to(device)

# Zero the gradients.
optimizer.zero_grad()

# Feed forward
outputs, attentions = my_model(image.to(torch.float32), captions)

# Calculate the batch loss.
targets = captions[:, 1:]
loss = criterion(outputs.view(-1, config.vocab_size), targets.reshape(-1))

# Backward pass.
loss.backward()

# Update the parameters in the optimizer.
optimizer.step()

In [27]:
outputs.view(-1, config.vocab_size)

tensor([[-0.0419, -0.0200, -0.0398,  ..., -0.0310,  0.0729,  0.0062],
        [ 0.1156, -0.0232, -0.1058,  ...,  0.0588,  0.0166, -0.1509],
        [ 0.0336,  0.0681, -0.1028,  ...,  0.0521,  0.0040, -0.1283],
        ...,
        [-0.2936,  0.1309,  0.0571,  ..., -0.1252,  0.2291, -0.0856],
        [-0.2734,  0.2516, -0.0050,  ...,  0.0603, -0.0179, -0.0015],
        [-0.2988,  0.1344, -0.0523,  ...,  0.0208, -0.0471,  0.0206]],
       device='cuda:0', grad_fn=<ViewBackward0>)

In [35]:
captions[:,:].shape

torch.Size([50, 20])

In [39]:
outputs.view(-1, config.vocab_size)

tensor([[-0.0419, -0.0200, -0.0398,  ..., -0.0310,  0.0729,  0.0062],
        [ 0.1156, -0.0232, -0.1058,  ...,  0.0588,  0.0166, -0.1509],
        [ 0.0336,  0.0681, -0.1028,  ...,  0.0521,  0.0040, -0.1283],
        ...,
        [-0.2936,  0.1309,  0.0571,  ..., -0.1252,  0.2291, -0.0856],
        [-0.2734,  0.2516, -0.0050,  ...,  0.0603, -0.0179, -0.0015],
        [-0.2988,  0.1344, -0.0523,  ...,  0.0208, -0.0471,  0.0206]],
       device='cuda:0', grad_fn=<ViewBackward0>)

In [None]:
# Run training and track with wandb
example_ct = 0  # number of examples seen
batch_ct = 0

loss_arr_batch = []  # Losses of the batches

for idx, (image, captions) in enumerate(iter(data_loader)):

    loss = train_batch(image.to(torch.float32), captions, model, config.vocab_size, optimizer, criterion, device=config.device)
    example_ct += len(image)
    batch_ct += 1

    loss_arr_batch.append(loss.tolist())

    # Report metrics every 1th batch
    if ((batch_ct + 1) % 1) == 0 and verbatim:
        train_log(loss, example_ct, epoch)


In [4]:
t0 = time.time()
my_iter = iter(train_loader)
t1 = time.time()
t0-t1
# bs 32 nw all

-5.296547889709473

In [5]:
a, b = next(my_iter)

In [8]:
train_loader.dataset.data[0//5][1][0%5]

tensor([  1,   4,  28,   8,   4, 195, 151,  17,  32,  67,   4, 353,  11, 711,
          8,  24,   3, 496,   5,   2], dtype=torch.int16)

In [11]:
b

tensor([[   1,    4,    9,    7,    8,    4,  195,  151,  316,   76,    4,  157,
            3,    5,    2,    0,    0,    0,    0,    0],
        [   1,    4,    9,    7,   32,   10,  711,   27,  104, 2409,    5,    2,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    7,  316,   76,    4,  157,   74,    5,    2,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    7,  316,   76,    4,  157,   74,    5,    2,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9,    7,   32,   76,    4,  157, 2409,    5,    2,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,   28,    8,    4,  195,  151,   17,   32,   67,    4,  353,
           11,  711,    8,   24,    3,  496,    5,    2],
        [   1,    4,    9,    7,   32,   76,    4,  157, 2409,    5,    2,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9, 

In [37]:
t0 = time.time()
my_iter = iter(train_loader)
t1 = time.time()
t0-t1
# bs 500 nw 1

-5.776714086532593

In [38]:
for a, b in my_iter:
    print(1)

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


In [21]:
iter_2 = deepcopy(my_iter)

NotImplementedError: ('{} cannot be pickled', '_MultiProcessingDataLoaderIter')