In [None]:
# python2 and python3 compatibility between loaded modules
from __future__ import print_function

In [None]:
# All imports go here
%matplotlib inline

# Reading files
import os

# Vector manipulations
import numpy as np

# DL framework
# torch
import torch
from torch import nn
from torch.nn import functional as F
import torch.utils.data as td
import torchvision as tv

# Plotting images
from matplotlib import pyplot as plt

# COCO loading captions
from pycocotools.coco import COCO
import skimage.io as io
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

# import created vocabulary
from vocab_creator import VocabCreate as vc

# PIL Image
from PIL import Image

# regex for captions
import re

"""
# evaluation metrics on MSCOCO dataset
from pycocoevalcap.eval import COCOEvalCap
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.spice.spice import Spice
"""

# json for dumping stuff onto files as output
import json
from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.3f')

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
# data loading
dataset_root_dir = '/datasets/COCO-2015/'
annotations_root_dir = '../datasets/COCO/annotations/'
train_dir = "train2014"
val_dir = "val2014"
test_dir = "test2015"

In [None]:
# training data annotations
train_ann = "{}captions_{}.json".format(annotations_root_dir, train_dir)
coco_train_caps = COCO(train_ann)

In [None]:
# validation data annotations
val_ann = "{}captions_{}.json".format(annotations_root_dir, val_dir)
coco_val_caps = COCO(val_ann)

In [None]:
# get all the image IDs
train_image_ids = coco_train_caps.getImgIds()
# loadImgs() returns all the images
train_imgs = coco_train_caps.loadImgs(train_image_ids)

In [None]:
print(type(train_imgs), len(train_imgs))

In [None]:
# get all the val image ids
val_image_ids = coco_val_caps.getImgIds()
# loadImgs() returns all the images
val_imgs = coco_val_caps.loadImgs(val_image_ids)

In [None]:
# read 1 random training image from file 
rand_id = np.random.randint(0, len(train_imgs))
rand_img = io.imread('{}/{}/{}'.format(dataset_root_dir, train_dir, train_imgs[rand_id]['file_name']))

In [None]:
plt.axis('off')
plt.imshow(rand_img)
plt.show()

In [None]:
# load caption for this particular image
ann_id = coco_train_caps.getAnnIds(imgIds=train_imgs[rand_id]['id'])
anns = coco_train_caps.loadAnns(ann_id)
coco_train_caps.showAnns(anns)
f = plt.figure()
plt.imshow(rand_img)
plt.axis("off")
plt.show()

In [None]:
print(anns)
print(len(anns))
print(anns[0])
print(anns[0]['caption'])

In [None]:
# read 1 random testing image from file
val_rand_id = np.random.randint(0, len(val_imgs))
val_rand_img = io.imread('{}/{}/{}'.format(dataset_root_dir, val_dir, val_imgs[val_rand_id]['file_name']))

In [None]:
plt.axis('off')
plt.imshow(val_rand_img)
plt.show()

In [None]:
# load caption for this particular image
ann_id = coco_val_caps.getAnnIds(imgIds=val_imgs[val_rand_id]['id'])
anns = coco_val_caps.loadAnns(ann_id)
coco_val_caps.showAnns(anns)
f = plt.figure()
plt.imshow(val_rand_img)
plt.axis("off")
plt.show()

In [None]:
# dataset class
class COCODataset(td.Dataset):
    
    
    def __init__(self, dataset_root_dir, annotations_root_dir, vocab, mode="train2014", image_size=(224, 224)):
        super(COCODataset, self).__init__()
        self.dataset_root_dir = dataset_root_dir
        self.annotations_root_dir = annotations_root_dir
        self.image_size = image_size
        self.mode = mode
        # training data annotations
        self.ann = "{}captions_{}.json".format(annotations_root_dir, mode)
        self.coco_caps = COCO(self.ann)
        # get all the image IDs
        self.image_ids = self.coco_caps.getImgIds()
        self.ann_ids = list(self.coco_caps.anns.keys())
        # loadImgs() returns all the images
        self.imgs = self.coco_caps.loadImgs(self.image_ids)
        self.vocab = vocab
        
    def __len__(self):
        return len(self.ann_ids)
    
    def __repr__(self):
        return "COCODataset(mode={}, image_size={})". \
        format(self.mode, self.image_size)
    
    def __getitem__(self, idx):
        ann_id = self.ann_ids[idx]
        cap = self.coco_caps.anns[ann_id]["caption"]
        img_id = self.coco_caps.anns[ann_id]["image_id"]
        img_path = self.coco_caps.loadImgs(img_id)[0]["file_name"]
        
        img = Image.open('{}/{}/{}'.format(self.dataset_root_dir, self.mode, img_path))
        img = img.convert('RGB')
        transform = tv.transforms.Compose([
            tv.transforms.Resize(self.image_size),
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.ToTensor(),
            tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        x = transform(img)
        
        # return caption
        cap = str(cap)
        clean_cap = re.sub(r'[^a-zA-Z0-9 ]+', '', cap)
        word_list = clean_cap.lower().strip().split()
        for i in range(len(word_list)):
            if word_list[i] not in vocab.one_hot_inds:
                word_list[i]="unk_vec"
        d = torch.Tensor([vocab.one_hot_inds["start_vec"]]
                               + [vocab.one_hot_inds[w] for w in word_list]
                               + [vocab.one_hot_inds["end_vec"]]
        )
        return x, d
    

In [None]:
# load the vocabulary
# or Create and save to output
dict_path = "../outputs/vocab.npz"
vocab = vc(train_ann, dict_path)

In [None]:
print(len(vocab))

In [None]:
# create an instance of the cocodataset
training_dataset = COCODataset(dataset_root_dir, annotations_root_dir, vocab)

In [None]:
def myimshow(image, ax=plt):
    image = image.to('cpu').numpy()
    image = np.moveaxis(image, [0, 1, 2], [2, 0, 1])
    image = (image + 1) / 2
    image[image<0] = 0
    image[image>1] = 1
    h = ax.imshow(image)
    ax.axis('off')
    return h

In [None]:
img, cap = training_dataset.__getitem__(47)
print(cap)
myimshow(img)

In [None]:
cap = cap.numpy().astype(int)
print(cap, cap.dtype)
print(type(list(cap)))
captions = [vocab.dict[cap[c]] for c in range(len(cap))]

In [None]:
for c in range(len(cap)):
    print(cap[c], type(cap[c]))
    print(vocab.dict[cap[c]])

In [None]:
print(" ".join(captions))

In [None]:
cap = "hellconvention"
clean_cap = re.sub(r'[^a-zA-Z0-9 ]+', '', cap)
word_list = clean_cap.lower().strip().split()
for i in range(len(word_list)):
    if word_list[i] not in vocab.one_hot_inds:
        word_list[i]="unk_vec"

print(word_list)