In [1]:
from google.colab import drive
import re
from torchtext.vocab import Vocab
import torch
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
import torchvision.transforms as T
import pickle as pkl
from PIL import Image
from copy import deepcopy

class TextNumericalizer():
	def __init__(self, vocab, tokenizer):
		self.vocab = vocab

	def tokenize(self, sentence):
		return (re.sub(r'[^\w\s]', '', sentence).lower()).split(" ")

	def SentenceToVector(self, sentence):
		return [self.vocab.stoi[token.lower()] for token in self.tokenize(sentence)]

	def VectorToSentence(self, vector):
		return [self.vocab.itos[integer] for integer in vector]
  
	def getVocabularyLength(self):
		return len(self.vocab.stoi.keys())

drive.mount("/content/drive/")
TN = pkl.load(open("/content/drive/MyDrive/Data/TextNumericalizer.pkl","rb"))
BATCH_SIZE = 1

class ImgCapDataset(Dataset):
    def __init__(self, X, Y, TN, transform=None):
        assert len(X)==len(Y), "Data should be of the same length! [Error: X(" + str(len(X)) + ") != Y(" + str(lenY) + ")]"
        self.X = X
        self.Y = Y
        self.TN = TN
        self.transform = transform

    def __getitem__(self, index):
        temp = deepcopy(self.Y[index])
        for i in range(len(self.Y[index])):
            temp[i] = re.sub(r'[^\w\s]', '', temp[i]).lower()

        return self.transform(self.X[index]), torch.tensor([self.TN.vocab.stoi["<sos>"]]+self.TN.SentenceToVector(temp[0])+[self.TN.vocab.stoi["<eos>"]], dtype=torch.long), [self.TN.SentenceToVector(i) for i in temp]

    def __len__(self):
        return len(self.X)

class CollateFunction():
    def __init__(self, padding):
        self.padding = padding

    def __call__(self, batch):
        return torch.cat([i[0].unsqueeze(0) for i in batch], dim=0), pad_sequence([i[1] for i in batch], batch_first=True, padding_value=self.padding), [i[2] for i in batch]

train_data_captions = pkl.load(open("/content/drive/MyDrive/Data/Train/train_captions.pkl","rb"))
train_images = []
train_captions = []

for img_name in train_data_captions:
    train_captions.append(train_data_captions[img_name])
    train_images.append(Image.open("/content/drive/MyDrive/Data/Train/Images/"+str(img_name)))

train_loader = DataLoader(
    ImgCapDataset(train_images, train_captions, TN, transform=T.Compose([T.Resize((224, 224)), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])),
    batch_size=BATCH_SIZE,
    collate_fn=CollateFunction(TN.vocab.stoi["<pad>"]),
    shuffle=True,
    pin_memory=True
                          )

pkl.dump(train_loader, open("/content/drive/MyDrive/Data/Train/eval_train_loader.pkl","wb"))

Mounted at /content/drive/
