In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
'''Import modules'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models
from collections import Counter
from skimage import io, transform
from torch.nn.utils.rnn import pack_padded_sequence
from torchsummary import summary

import matplotlib.pyplot as plt # for plotting
import numpy as np
from time import time
import collections
import pickle
import os
import gensim
import nltk
from PIL import Image

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device =", device)
print("Using", torch.cuda.device_count(), "GPUs!")
platform = "local" #colab/local
restore = False #Restore Checkpoint
phase = "Train"

Device = cpu
Using 0 GPUs!


In [3]:
# Define a transform to pre-process the training images.
img_transform = transforms.Compose([ 
    transforms.Resize(256),                          # smaller edge of image resized to 256
    transforms.RandomCrop(224),                      # get 224x224 crop from random location
    transforms.RandomHorizontalFlip(),               # horizontally flip image with probability=0.5
    transforms.ToTensor(),                           # convert the PIL Image to a tensor
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
    ])

In [4]:
class ImageCaptionsDataset(Dataset):

    def __init__(self, img_dir, img_transform=None):
        """
        Args:
            img_dir (string): Directory with all the images.
            captions_dict: Dictionary with captions list keyed by image ids (integers)
            img_transform (callable, optional): Optional transform to be applied
                on the image sample.

            captions_transform: (callable, optional): Optional transform to be applied
                on the caption sample (list).
        """
        self.img_dir = img_dir
        self.img_transform = img_transform
        images = os.listdir(os.path.join(img_dir))
        images = [i.split("_")[1][:-4] for i in images]
        images = [int(i) for i in images]
        images.sort()
        self.image_ids = images

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        #print('IMG No.', self.image_ids[idx])
        img_name = os.path.join(self.img_dir, 'image_{}.jpg'.format(self.image_ids[idx]))
        image = Image.open(img_name).convert('RGB')
        #print("RAW IMG", image.shape)
        if self.img_transform:
            image = self.img_transform(image)
            #print("AFTER img_transform", image.shape)
            #image = image.transpose((2, 0, 1))

            
        sample = {'idx': idx, 'image': image}

        return sample

In [5]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        resnet50 = models.resnet50(pretrained=True, progress=True)        
        
        for param in resnet50.parameters():
            param.requires_grad = False
        self.resnet50 = resnet50
        print("resnet50 Loaded Successfully..!")

    def forward(self, x):
        x = self.resnet50(x)
        return x


In [14]:
if platform == "colab":
    IMAGE_DIR = '/content/drive/My Drive/train_images/'
else:
    IMAGE_DIR = 'D:/Padhai/IIT Delhi MS(R)/2019-20 Sem II/COL774 Machine Learning/Assignment/Assignment4/train_images/'

train_dataset = ImageCaptionsDataset(
    IMAGE_DIR, img_transform=img_transform
)

BATCH_SIZE = 2
NUM_WORKERS = 0

# Creating the DataLoader for batching purposes
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

train_images_features = {}
encoder = Encoder()
if device == "cuda":
    encoder.cuda()
    print("Encoder to CUDA")

t0 = time()
print("FOM NUM WORKERS =", NUM_WORKERS)
for batch_idx, sample in enumerate(train_loader):
    images_batch = sample["image"]
    images_idx = sample["idx"]
    print(images_idx.tolist())
    images_features = encoder(images_batch)
    
    batch_dict = dict(zip(images_idx.tolist(), images_batch[0]))
    train_images_features.update(batch_dict)
    print(batch_dict)
print("Time Taken by NUM_WORKERS={} ={}Min".format(NUM_WORKERS,round((time()-t0)/60, 3)))


resnet50 Loaded Successfully..!
FOM NUM WORKERS = 0
[0, 1]
{0: tensor([[-1.1247, -0.2171, -1.6384,  ..., -2.0323, -1.8610, -1.9295],
        [-1.8953, -1.1418, -1.8097,  ..., -2.0494, -1.9467, -1.8097],
        [-1.9638, -2.0152, -1.9980,  ..., -1.9980, -2.0665, -1.8953],
        ...,
        [-1.7754, -1.7583, -1.7754,  ..., -2.0152, -1.9295, -1.9980],
        [-1.7412, -1.8782, -1.6727,  ..., -1.9467, -1.9809, -1.9809],
        [-1.7240, -1.6384, -1.1932,  ..., -0.8678, -1.1760, -1.7583]]), 1: tensor([[-0.4426,  0.7829, -0.6527,  ..., -1.9657, -1.5630, -1.6155],
        [-1.5980, -0.4251, -1.2304,  ..., -1.9132, -1.7556, -1.4930],
        [-1.7906, -1.6506, -1.8256,  ..., -1.7556, -1.8782, -1.5980],
        ...,
        [-1.7206, -1.7206, -1.7206,  ..., -1.8431, -1.8081, -1.7731],
        [-1.6506, -1.7906, -1.5980,  ..., -1.6506, -1.7031, -1.6506],
        [-1.6681, -1.5805, -1.1779,  ..., -0.1975, -0.7052, -1.3179]])}
[2, 3]


KeyboardInterrupt: 

In [12]:
if platform == "colab":
    fname = '/content/drive/My Drive/A4/data/train_data.pkl'
else:
    fname = '../data/train_data.pkl'
    
with open(fname, 'wb') as handle:
    pickle.dump(train_images_features, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:

BATCH_SIZE = 100
NUM_WORKERS = 2
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

t0 = time()
print("FOM NUM WORKERS =", NUM_WORKERS)
for batch_idx, sample in enumerate(train_loader):
    images_batch = sample["image"]
print("Time Taken by NUM_WORKERS={} ={}Min".format(NUM_WORKERS,round((time()-t0)/60, 3)))

BATCH_SIZE = 100
NUM_WORKERS = 4
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

t0 = time()
print("FOM NUM WORKERS =", NUM_WORKERS)
for batch_idx, sample in enumerate(train_loader):
    images_batch = sample["image"]
print("Time Taken by NUM_WORKERS={} ={}Min".format(NUM_WORKERS,round((time()-t0)/60, 3)))

BATCH_SIZE = 100
NUM_WORKERS = 8
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

t0 = time()
print("FOM NUM WORKERS =", NUM_WORKERS)
for batch_idx, sample in enumerate(train_loader):
    images_batch = sample["image"]
print("Time Taken by NUM_WORKERS={} ={}Min".format(NUM_WORKERS,round((time()-t0)/60, 3)))

BATCH_SIZE = 100
NUM_WORKERS = 16
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

t0 = time()
print("FOM NUM WORKERS =", NUM_WORKERS)
for batch_idx, sample in enumerate(train_loader):
    images_batch = sample["image"]
print("Time Taken by NUM_WORKERS={} ={}Min".format(NUM_WORKERS,round((time()-t0)/60, 3)))