In [2]:
import os
import random
import wandb

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T

from train import *
from test import *
from utils.utils import *
from models.models import *
import multiprocessing



# Global variables
global device

import os

# Setting CUDA ALLOC split size to 256 to avoid running out of memory
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
# Stopping wandb from creating symlinks
os.environ["WANDB_DISABLE_SYMLINKS"] = "true"

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


def model_pipeline(cfg: dict):
    # tell wandb to get started
    with wandb.init(project="pytorch-demo", config=cfg):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config

        # execute only once to create the dataset
        # generate_and_dump_dataset(config.root_dir, config.captions_file, config.transforms, cfg.DATA_LOCATION)

        # Generate Dataset
        dataset = make_dataset(config)

        # make the data_loaders, and optimizer
        t0 = time.time()
        train_loader, test_loader = make_dataloaders(config, dataset, 1)
        t1 = time.time()
        print("Preprocessing_time:", t1-t0)

        # Generate vocab
        vocab = dataset.vocab
        config.vocab_size = len(vocab)


        # Get the model
        my_model = make_model(config, device)

        # Make the loss and optimizer
        criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi["<PAD>"])
        optimizer = torch.optim.SGD(my_model.parameters(), lr=config.learning_rate, momentum=config.momentum)

        train_loss_arr_epoch = []  # Mean of the losses of the last epoch
        test_loss_arr_epoch = []
        acc_arr_epoch = []

        train_loss_arr_batch = [] # Losses of the batches
        test_loss_arr_batch = []
        acc_arr_batch = []

        train_execution_times = []
        test_execution_times = []

        for epoch in tqdm(range(1, config.epochs + 1)):
            # Training the model
            t0 = time.time()
            train_loss_arr_aux = train(my_model, train_loader, criterion, optimizer, config, epoch)
            t1 = time.time()

            my_model.eval()
            # Testing
            t2 = time.time()
            acc_arr_aux, test_loss_arr_aux = test(my_model, test_loader, criterion, vocab, config, device)
            t3 = time.time()

            # Check how model performs
            test_model_performance(my_model, test_loader, device, vocab, epoch, config)

            my_model.train()

            # Logging data for vizz
            train_loss_arr_epoch.append(sum(train_loss_arr_aux) / len(train_loss_arr_aux))
            test_loss_arr_epoch.append(sum(test_loss_arr_aux) / len(test_loss_arr_aux))

            train_loss_arr_batch += train_loss_arr_aux
            test_loss_arr_batch += test_loss_arr_aux

            acc_arr_epoch.append(sum(acc_arr_aux) / len(acc_arr_aux))
            acc_arr_batch += acc_arr_aux

            train_execution_times.append(t1-t0)
            test_execution_times.append(t3-t2)

        epoch_df = pd.DataFrame([train_loss_arr_epoch, test_loss_arr_epoch, acc_arr_epoch, train_execution_times,
                                 test_execution_times],
                                columns=['epoch_' + str(i) for i in range(len(train_loss_arr_epoch))],
                                index=['train_loss', 'test_loss' ,'test_acc', 'train_times','test_times'])
        loss_batch_df = pd.DataFrame([train_loss_arr_batch],
                                    columns=['batch_' + str(i) for i in range(len(train_loss_arr_batch))],
                                    index=['train_loss'])
        acc_batch_df = pd.DataFrame([acc_arr_batch, test_loss_arr_batch],
                                    columns=['batch_' + str(i) for i in range(len(acc_arr_batch))],
                                    index=['test_acc', 'test_loss'])

        if config.save:
            epoch_df.to_csv(config.DATA_LOCATION+'/logs'+'/epoch_df.csv')
            loss_batch_df.to_csv(config.DATA_LOCATION+'/logs'+'/loss_batch_df.csv')
            acc_batch_df.to_csv(config.DATA_LOCATION+'/logs'+'/acc_batch_df.csv')
            save_model(my_model, config, config.DATA_LOCATION+'/logs'+'/EncoderDecorder_model.pth')

    return my_model


if __name__ == "__main__":
    wandb.login()

    print("Using: ", device)

    transforms = T.Compose([
        T.Resize(226),
        T.RandomCrop(224),
        T.ToTensor(),
        T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    DATA_LOCATION = '../data'

    config = dict(
        root_dir=DATA_LOCATION+"/Images",
        captions_file=DATA_LOCATION+"/captions.txt",
        device=device,
        encoder='ResNet50',
        transforms=transforms,
        embed_size=300,
        attention_dim=256,
        encoder_dim=2048,
        decoder_dim=512,
        epochs=20,
        learning_rate=0.01,
        batch_size=30,
        DATA_LOCATION=DATA_LOCATION,
        train_size=0.1,
        save=True,
        momentum=0.8
    )

    model = model_pipeline(config)


Using:  cuda:0


Preprocessing_time: 9.421124935150146


  0%|          | 0/20 [00:00<?, ?it/s]

Loss after 00030 examples: 8.016
Loss after 00060 examples: 8.020
Loss after 00090 examples: 8.006
Loss after 00120 examples: 7.980
Loss after 00150 examples: 7.966
Loss after 00180 examples: 7.950
Loss after 00210 examples: 7.923
Loss after 00240 examples: 7.891
Loss after 00270 examples: 7.880
Loss after 00300 examples: 7.844
Loss after 00330 examples: 7.800
Loss after 00360 examples: 7.782
Loss after 00390 examples: 7.740
Loss after 00420 examples: 7.729
Loss after 00450 examples: 7.697
Loss after 00480 examples: 7.657
Loss after 00510 examples: 7.625
Loss after 00540 examples: 7.586
Loss after 00570 examples: 7.552
Loss after 00600 examples: 7.505
Loss after 00630 examples: 7.476
Loss after 00660 examples: 7.460
Loss after 00690 examples: 7.420
Loss after 00720 examples: 7.355
Loss after 00750 examples: 7.331
Loss after 00780 examples: 7.226
Loss after 00810 examples: 7.227
Loss after 00840 examples: 7.232
Loss after 00870 examples: 7.125
Loss after 00900 examples: 7.069
Loss after

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Mean BLEU score of the model on the 100 test images: 6.030654703641959e-155%
count: 2.415217638015747
Loss after 00030 examples: 1.572
Loss after 00060 examples: 1.505
Loss after 00090 examples: 1.460
Loss after 00120 examples: 1.395
Loss after 00150 examples: 1.386
Loss after 00180 examples: 1.405
Loss after 00210 examples: 1.446
Loss after 00240 examples: 1.515
Loss after 00270 examples: 1.421
Loss after 00300 examples: 1.243
Loss after 00330 examples: 1.493
Loss after 00360 examples: 1.356
Loss after 00390 examples: 1.218
Loss after 00420 examples: 1.446
Loss after 00450 examples: 1.384
Loss after 00480 examples: 1.197
Loss after 00510 examples: 1.217
Loss after 00540 examples: 1.167
Loss after 00570 examples: 1.338
Loss after 00600 examples: 1.153
Loss after 00630 examples: 1.095
Loss after 00660 examples: 1.124
Loss after 00690 examples: 1.139
Loss after 00720 examples: 1.201
Loss after 00750 examples: 1.048
Loss after 00780 examples: 1.164
Loss after 00810 examples: 1.274
Loss af

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,██████▇▇▇▇▇▆▆▆▅▅▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
test_mean_bleu,▁

0,1
epoch,1.0
loss,1.5289
test_mean_bleu,0.0


KeyboardInterrupt: 

In [6]:
a = torch.zeros([2,3], dtype=torch.float32)
a.type()

'torch.FloatTensor'

In [15]:
train_loader.dataset.data[1//5][0].to(torch.float64)

tensor([[[ 0.,  0.,  0.,  ..., -1., -1., -1.],
         [-1.,  0.,  0.,  ..., -1., -1., -1.],
         [-1.,  0.,  0.,  ..., -1., -1., -1.],
         ...,
         [ 1.,  1.,  0.,  ...,  1.,  0.,  0.],
         [ 0.,  0.,  0.,  ...,  1.,  0.,  0.],
         [ 0.,  1.,  0.,  ...,  1.,  0.,  0.]],

        [[ 0.,  0.,  0.,  ..., -1., -1.,  0.],
         [ 0.,  0.,  0.,  ..., -1., -1., -1.],
         [-1.,  0.,  0.,  ..., -1., -1., -1.],
         ...,
         [ 1.,  0.,  0.,  ...,  1.,  1.,  1.],
         [ 0.,  0.,  0.,  ...,  1.,  1.,  1.],
         [ 0.,  1.,  0.,  ...,  1.,  1.,  1.]],

        [[ 0.,  0.,  0.,  ..., -1., -1., -1.],
         [ 0.,  0.,  0.,  ..., -1., -1., -1.],
         [ 0.,  0.,  0.,  ..., -1., -1., -1.],
         ...,
         [ 0.,  0., -1.,  ...,  2.,  1.,  1.],
         [-1., -1., -1.,  ...,  2.,  1.,  1.],
         [ 0.,  0.,  0.,  ...,  2.,  1.,  1.]]], dtype=torch.float64)

In [2]:
b

NameError: name 'b' is not defined

In [18]:
train_loader.dataset.data[4//5][1][4%5].to(torch.float64)

tensor([  1.,   4.,   9.,   7.,   8.,   4., 195., 151., 316.,  76.,   4., 157.,
          3.,   5.,   2.], dtype=torch.float64)

In [3]:
import os, psutil; print(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2)

1283.12109375


In [3]:
with wandb.init(project="pytorch-demo", config=config):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config

        # execute only once to create the dataset
        # generate_and_dump_dataset(config.root_dir, config.captions_file, config.transforms, cfg.DATA_LOCATION)

        # Generate Dataset
        dataset = make_dataset(config)

        # make the data_loaders, and optimizer
        train_loader, test_loader = make_dataloaders(config, dataset, 1)

In [4]:
t0 = time.time()
my_iter = iter(train_loader)
t1 = time.time()
t0-t1
# bs 32 nw all

-5.296547889709473

In [5]:
a, b = next(my_iter)

In [8]:
train_loader.dataset.data[0//5][1][0%5]

tensor([  1,   4,  28,   8,   4, 195, 151,  17,  32,  67,   4, 353,  11, 711,
          8,  24,   3, 496,   5,   2], dtype=torch.int16)

In [11]:
b

tensor([[   1,    4,    9,    7,    8,    4,  195,  151,  316,   76,    4,  157,
            3,    5,    2,    0,    0,    0,    0,    0],
        [   1,    4,    9,    7,   32,   10,  711,   27,  104, 2409,    5,    2,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    7,  316,   76,    4,  157,   74,    5,    2,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    7,  316,   76,    4,  157,   74,    5,    2,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9,    7,   32,   76,    4,  157, 2409,    5,    2,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,   28,    8,    4,  195,  151,   17,   32,   67,    4,  353,
           11,  711,    8,   24,    3,  496,    5,    2],
        [   1,    4,    9,    7,   32,   76,    4,  157, 2409,    5,    2,    0,
            0,    0,    0,    0,    0,    0,    0,    0],
        [   1,    4,    9, 

In [37]:
t0 = time.time()
my_iter = iter(train_loader)
t1 = time.time()
t0-t1
# bs 500 nw 1

-5.776714086532593

In [38]:
for a, b in my_iter:
    print(1)

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


In [21]:
iter_2 = deepcopy(my_iter)

NotImplementedError: ('{} cannot be pickled', '_MultiProcessingDataLoaderIter')