In [1]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, Adadelta, Adagrad
from keras.utils import np_utils, generic_utils
from keras.layers import Embedding,GRU,TimeDistributed,RepeatVector,Merge,BatchNormalization
from keras.preprocessing import sequence
from keras import callbacks
import numpy as np
from vgg16 import Vgg16

from os import listdir
from os.path import isfile, join

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import PIL.Image

import json
from tqdm import tqdm

from keras.optimizers import SGD, RMSprop, Adam

from utils import *

import cPickle as pickle
from matplotlib import pyplot as plt

from itertools import compress

import shutil
import string

import collections
import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords

import re
from numpy.random import random, permutation, randn, normal 

import os

import preprocessing as preproc


Using Theano backend.
Using gpu device 0: GeForce GTX 1080 (CNMeM is disabled, cuDNN 5110)


In [2]:

def constructImageEntryArr(imageDataArr):
    image_entry_arr = []

    for imageData in imageDataArr:
        image = imageData.image

        for caption in imageData.captions:
            image_entry_arr.append(ImageEntry(image,caption))

    return image_entry_arr


def get_unique_words(captions):
    unique_words = []
    words = [caption.split() for caption in captions]
   
    for word in words:
        unique_words.extend(word)
        
    unique_words = list(set(unique_words))
    
    return unique_words

def get_index_word_dicts(unique_words):
    word_index = {}
    index_word = {}
    for i,word in enumerate(unique_words):
        word_index[word] = i
        index_word[i] = word
        
    return (word_index,index_word)

def get_train_captions_indexed(captions, word2index, MAX_CAPTION_LEN ):
    
    train_captions_indexed = []
    for caption in captions:
        one = [word2index[caption_word] for caption_word in caption.split()]
        train_captions_indexed.append(one)

    train_captions_indexed = sequence.pad_sequences(train_captions_indexed, maxlen=MAX_CAPTION_LEN,padding='post')
    return train_captions_indexed

        
        
def compute_partial_all_words_2_next_word(captions_indexed,step_size,temp_save_path):
    
    if not os.path.exists(temp_save_path):
        os.makedirs(temp_save_path)
    
    for window_start in tqdm(range(0,len(captions_indexed),step_size)):
    
        captions_indexed_batch = captions_indexed[window_start:window_start+step_size]

        all_words_2_next_word = []

        for caption_indexed in captions_indexed_batch:

            word_2_next_word = []

            enhanced_caption_indexed = np.append(caption_indexed,[word2index["END"]]) #hacky

            for i in xrange(0,len(caption_indexed)):
                caption_word_index = enhanced_caption_indexed[i]
                future_word_index = enhanced_caption_indexed[i+1]
                future_indexes = np.zeros(VOCAB_SIZE)
                future_indexes[future_word_index] = 1

                word_2_next_word.append(future_indexes)

            words_2_next_word = np.vstack(word_2_next_word)

            all_words_2_next_word.append(words_2_next_word)

        save_array(temp_save_path+ 'all_words_2_next_word__'+str(format(window_start, "06"))+'.bc', all_words_2_next_word)
    
def get_future_words(temp_save_path):
    all_words_2_next_word_paths= [f for f in listdir(temp_save_path)]
    all_words_2_next_word_paths.sort()

    all_words_2_next_word = [load_array(temp_save_path + all_words_2_next_word_path) 
                             for all_words_2_next_word_path in all_words_2_next_word_paths ]

    future_words = np.vstack(all_words_2_next_word)
    future_words = np.transpose(future_words,(0,1,2))
    
    return future_words
    
def load_vectors(loc):
    return (load_array(loc+'.dat'),
        pickle.load(open(loc+'_words.pkl','rb')),
        pickle.load(open(loc+'_idx.pkl','rb')))

def create_emb(vecs,words,wordidx,vocab_size):
    n_fact = vecs.shape[1]
    emb = np.zeros((vocab_size, n_fact))

    found = 0
    not_found = 0
    
    exclude = set(string.punctuation)
    for i in range(1,len(emb)):
        word = index2word[i]
        word = ''.join(ch for ch in word if ch not in exclude).lower()
        if word and re.match(r"^[a-zA-Z0-9\-]*$", word) and word in wordidx:
            src_idx = wordidx[word]
            emb[i] = vecs[src_idx]
            found +=1
        else:
            # If we can't find the word in glove, randomly initialize
            emb[i] = normal(scale=0.6, size=(n_fact,))
            not_found+=1
#             print(word)

    # This is our "rare word" id - we want to randomly initialize
    emb[-1] = normal(scale=0.6, size=(n_fact,))
    emb/=3
    
    print("Found = %d"%found)
    print("Not found = %d"%not_found)
        
    return emb

def get_short_caption_mask(captions, max_length):
    return [len(caption.split()) < max_length for caption in captions]
    
def filter_array_by_mask(arr, mask):
    return np.asarray(list(compress(arr, mask)))

def has_only_common_words(caption,word2valid):
    valid_words = [word2valid[word] for word in caption.split()]
    return all(valid_words)

def compute_common_words_caption_mask(captions,min_no_of_app):
    
    sentences = [caption.split() for caption in captions]
    words = []
    for word in sentences:
        words.extend(word)

    counter=collections.Counter(words)
    
    word2no_app = dict(counter.most_common())
    
    word2valid = {word:app>=min_no_of_app for word,app in word2no_app.iteritems()}
    
    corect_captions = [has_only_common_words(caption,word2valid) for caption in captions]
    
    return corect_captions

def search_images_by(searched_word,images,predicted_captions):
    lmtzr = WordNetLemmatizer()
    lemm_word = lmtzr.lemmatize(searched_word)
    
    found_indexes = []
    for index,caption in enumerate(predicted_captions):
        lemm_caption_words = [lmtzr.lemmatize(word) for word in caption.split()]
        if lemm_word in lemm_caption_words:
            found_indexes.append(index)
    
    return ([images[i] for i in found_indexes],[predicted_captions[i] for i in found_indexes])

def get_test_model(model):
    test_model = Sequential()
    test_model.add(Merge([vgg_model, language_model], mode='concat'))
    for layer in model.layers[1:]:
        test_model.add(layer)
        
    test_model.compile(loss='categorical_crossentropy', optimizer = Adam(0.001))
    return test_model

def plot_predictions(ims, titles = None):  
    for i in range(len(ims)):
        if(titles):
            plt.title(titles[i])
        plt.imshow(ims[i])
        plt.figure()
            
    plt.show()
    
def make_prediction(random_number,images_concat_t,vgg_model):
    startIndex = word2index["START"]
    start_captions = [[startIndex]]
    start_captions = sequence.pad_sequences(start_captions, maxlen=MAX_CAPTION_LEN,padding='post')

    firstImage = np.expand_dims(images_concat_t[random_number], axis=0)

    first_image_vgg_features = vgg_model.predict(firstImage)
#     first_image_input = firstImage
    first_image_input = np.squeeze(first_image_vgg_features)[0].reshape(1,4096)
    
    firstCaption = np.expand_dims(start_captions[0], axis=0) 

    outputs = []

    endGenerated = False
    i = 0
    while ((not endGenerated) & (i < MAX_CAPTION_LEN-1)):

        predictions = model.predict([first_image_input, firstCaption])
        predictions = predictions[0]

        currentPred = predictions[i]

        max_index = np.argmax(currentPred)

        outputs.append(max_index)
        firstCaption[0,i+1] = max_index

        i+=1

        if(index2word[max_index] == "END"):
            endGenerated = True

    caption = ' '.join([index2word[x] for x in firstCaption[0][:i+1]])
    
    drawImage = firstImage[0]
    drawImageT = np.transpose(drawImage,(1,2,0))
    plt.imshow(drawImageT)
    
    return (drawImageT,caption)

def make_prediction_on_dataset(images_concat_t):
    
    vgg_model = get_vgg_model()
    
    images2Captions = [make_prediction(i,images_concat_t,vgg_model) for i in tqdm(range(TEST_WINDOW_START,TEST_WINDOW_START+NO_TEST_IMAGES))]
    images = [image2Caption[0] for image2Caption in images2Captions]
    predicted_captions = [image2Caption[1] for image2Caption in images2Captions]

    
    return (images,predicted_captions)

def make_prediction_on_misc_dataset(images_concat_t):
    
    vgg_model = get_vgg_model()
    
    images2Captions = [make_prediction(i,images_concat_t,vgg_model) for i in tqdm(range(len(images_concat_t)))]
    images = [image2Caption[0] for image2Caption in images2Captions]
    predicted_captions = [image2Caption[1] for image2Caption in images2Captions]

    
    return (images,predicted_captions)

def plot_loss_from_history(history):
    plt.plot(history.history['loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    

In [3]:
save_path = "/home/docker/fastai-courses/deeplearning1/nbs/persistent/coco/"

tb_folder = "tensorboard-logs/"
tb_path = save_path + tb_folder + "logs"

annotation_path = save_path +"raw_annotations/captions_val2014.json"

images_path = save_path+"raw_images/val2014"
image_data_arr_path = save_path+"imageDataArr/"
images_concat_t_path = save_path+"imagesConcatT/"
captions_path = save_path+"captions/"
temp_save_path = save_path+"temp/"
model_path = save_path+"models/"
images_vgg_features_path = save_path + "images_vgg_features/"
misc_images_path = save_path+"misc-images/"
train_path = save_path + "train/"
test_path = save_path + "test/"


images_concat_folder = "images_concat/"
images_vgg_4096_folder = "images_vgg_4096/"
captions_folder = "captions/"
indexed_captions_folder = "indexed-captions/"
indexed_future_words_folder = "indexed-future-words/"
glove_folder = "glove/"
misc_folder = "misc/"

batch_folder = "batches/"

# Read Serialized Data - Images

In [21]:
# train_images_concat_t = preproc.read_serialized_np_arr(train_path + images_concat_folder+ 'images_concat.bc',
#                                                        NR_TRAIN_INSTANCES)

test_images_concat_t = preproc.read_serialized_np_arr(test_path + images_concat_folder+ 'images_concat.bc')

In [22]:
# print(train_images_concat_t.shape)
print(test_images_concat_t.shape)

(1000, 3, 224, 224)


# Read Serialized Data - Image VGG Features

In [None]:
# train_images_precomputed_vgg_features = preproc.read_serialized_np_arr(train_path+images_vgg_4096_folder+'vgg_features.bc',NR_TRAIN_INSTANCES)
# test_images_precomputed_vgg_features = preproc.read_serialized_np_arr(test_path+images_vgg_4096_folder+'vgg_features.bc')

In [None]:
# print(train_images_precomputed_vgg_features.shape)
# print(test_images_precomputed_vgg_features.shape)

# Captions

In [None]:
# raw_train_captions = preproc.get_truncated_captions_from_batch(train_path + captions_folder, batch_nr = 0, nr_instances = NR_TRAIN_INSTANCES )
# test_captions = preproc.get_truncated_captions_from_batch(test_path + captions_folder, batch_nr = 0, nr_instances = NR_TRAIN_INSTANCES )
# print"Train captions: %d \nTest Captions %d"%(len(raw_train_captions),len(test_captions))

# Filter data

In [None]:
# print"Before Train images: %d \nBefore Test images %d"%(len(raw_train_captions),len(test_captions))

## Caption length

In [None]:
# NEW_CAPTION_LENGTH = 15
# train_mask_caption_length = get_short_caption_mask(raw_train_captions,NEW_CAPTION_LENGTH)
# # test_mask_caption_length = get_short_caption_mask(test_captions,NEW_CAPTION_LENGTH)

## Number of words

In [None]:
# MIN_NO_OF_APP = 2
# train_mask_no_words = compute_common_words_caption_mask(raw_train_captions,min_no_of_app = MIN_NO_OF_APP)
# # test_mask_no_words = compute_common_words_caption_mask(test_captions,min_no_of_app = MIN_NO_OF_APP)

## Combine masks for images

In [None]:
# train_combined_masks = [a and b for a, b in zip(train_mask_caption_length, train_mask_no_words)] 
# # test_combined_masks = [a and b for a, b in zip(test_mask_caption_length, test_mask_no_words)]

# train_captions = filter_array_by_mask(raw_train_captions,train_combined_masks)
# # test_captions = filter_array_by_mask(test_captions,test_combined_masks)

In [None]:
train_captions = raw_train_captions

In [None]:
# print"Train images: %d \nTest images %d"%(len(train_captions),len(test_captions))

## Images

In [None]:
train_image_model_input = train_images_concat_t
# test_image_model_input = test_images_concat_t

# train_image_model_input = train_images_precomputed_vgg_features
# test_image_model_input = test_images_precomputed_vgg_features

In [None]:
# train_image_model_input = filter_array_by_mask(raw_train_image_model_input,train_combined_masks)
# test_image_model_input = filter_array_by_mask(test_image_model_input,test_combined_masks)
# print"Train images: %d \nTest images %d"%(len(train_image_model_input),len(test_image_model_input))

# Load precomputed misc data structures

In [4]:
train_img_vgg_path = train_path + batch_folder +images_vgg_4096_folder
train_indexed_captions_path = train_path + batch_folder + indexed_captions_folder
train_raw_captions_path = train_path+batch_folder+captions_folder
train_future_words_path = train_path+batch_folder+indexed_future_words_folder

test_img_vgg_path = test_path + batch_folder +images_vgg_4096_folder
test_indexed_captions_path = test_path + batch_folder + indexed_captions_folder
test_raw_captions_path = test_path+batch_folder+captions_folder
test_future_words_path = test_path+batch_folder+indexed_future_words_folder




In [5]:
def get_captions_indexed(raw_captions_path, indexed_captions_path):
    
    indexed_captions = []
    raw_captions = []
    
    indexed_captions_elements = os.listdir(indexed_captions_path)
    raw_captions_elements = os.listdir(raw_captions_path)
    
    indexed_captions_elements.sort()
    raw_captions_elements.sort()
    
    nr_elem = len(indexed_captions_elements)

    for index in tqdm(range(nr_elem)):
        
        indexed_caption_name = indexed_captions_elements[index]
        indexed_caption = preproc.load_array(indexed_captions_path+"/"+indexed_caption_name)
        indexed_captions.append(indexed_caption)        
        
        raw_caption_name = raw_captions_elements[index]
        raw_caption = preproc.load_array(raw_captions_path+"/"+raw_caption_name)
        raw_captions.append(raw_caption)        
        
    
    stacked_indexed_captions = np.vstack(indexed_captions)
    stacked_raw_captions = np.vstack(raw_captions)
    
    return (stacked_indexed_captions,stacked_raw_captions)

In [6]:
unique_words = preproc.load_obj(train_path+batch_folder+misc_folder+"unique_words")
word2index = preproc.load_obj(train_path+batch_folder+misc_folder+"word2index")
index2word = preproc.load_obj(train_path+batch_folder+misc_folder+"index2word")

(train_captions_indexed,train_captions_raw) = get_captions_indexed(train_raw_captions_path,train_indexed_captions_path)
(_,test_captions_raw) = get_captions_indexed(test_raw_captions_path,test_indexed_captions_path)


100%|██████████| 79/79 [00:00<00:00, 704.23it/s]
100%|██████████| 2/2 [00:00<00:00, 855.02it/s]


In [7]:
VOCAB_SIZE = len(unique_words)
MAX_CAPTION_LEN = 15 # ATENTIE AICI

In [8]:
print("MAX_CAPTION_LEN = %s"%MAX_CAPTION_LEN)
print("VOCAB_SIZE = %s"%VOCAB_SIZE)


MAX_CAPTION_LEN = 15
VOCAB_SIZE = 18312


In [None]:
# (40438, 259, 13601)

# Word Embeddings

In [14]:
EMB_SIZE = 200
vecs, words, wordidx = load_vectors(save_path+glove_folder+"6B."+str(EMB_SIZE)+"d")

In [15]:
emb = create_emb(vecs, words, wordidx,VOCAB_SIZE)

Found = 16585
Not found = 1726


# Model Building

## VGG

In [9]:
def get_vgg_model():
    image_model = Vgg16().model
    image_model.pop()
    image_model.pop()
    image_model.trainable = False
    image_model.add(RepeatVector(MAX_CAPTION_LEN))
    return image_model

def get_precomputed_input_model():
    input_model = Sequential()
    input_model.add(RepeatVector(MAX_CAPTION_LEN,input_shape=(4096,)))
    return input_model

## GRU

In [10]:
def get_language_model(emb):
    language_model = Sequential()
    #language_model.add(Embedding(VOCAB_SIZE, 256, input_length=MAX_CAPTION_LEN))
    #language_model.add(Embedding(VOCAB_SIZE, EMB_SIZE, input_length=MAX_CAPTION_LEN,weights=[emb], trainable=False))
    language_model.add(Embedding(VOCAB_SIZE, EMB_SIZE, input_length=MAX_CAPTION_LEN,weights=[emb]))
    Dropout(0.2)
    language_model.add(BatchNormalization())
    return language_model

In [11]:
def build_model(image_model,language_model):
    model = Sequential()
    model.add(Merge([image_model, language_model], mode='concat'))
    model.add(GRU(1024, return_sequences=True))
    model.add(TimeDistributed(Dense(VOCAB_SIZE, activation = 'softmax')))

    model.compile(loss='categorical_crossentropy', optimizer = Adam(0.001))
    return model


In [16]:
image_model = get_precomputed_input_model()
language_model = get_language_model(emb)
model = build_model(image_model,language_model)

In [17]:
# model.summary()

# Training the model

In [18]:
def generate_arrays_from_file(img_vgg_path,indexed_captions_path,future_words_path):
    while 1:
        img_vgg_elements = os.listdir(img_vgg_path)
        indexed_captions_elements = os.listdir(indexed_captions_path)
        future_words_elements = os.listdir(future_words_path)
        
        img_vgg_elements.sort()
        indexed_captions_elements.sort()
        future_words_elements.sort()

        nr_elem = len(img_vgg_elements)
        
        BATCH_SIZE = 1
        
        for index in range(nr_elem/BATCH_SIZE):
            
            img_vgg_batch_list = []
            indexed_caption_batch_list = []
            future_words_batch_list = []
            
            for elem_in_batch in range(BATCH_SIZE):
                
                img_vgg_el_name = img_vgg_elements[index*BATCH_SIZE + elem_in_batch]
                indexed_caption_name = indexed_captions_elements[index*BATCH_SIZE + elem_in_batch]
                future_words_el_name = future_words_elements[index*BATCH_SIZE + elem_in_batch]

                img_vgg = preproc.load_array(img_vgg_path+"/"+img_vgg_el_name)
                indexed_caption = preproc.load_array(indexed_captions_path+"/"+indexed_caption_name)
                future_words = preproc.load_array(future_words_path+"/"+future_words_el_name)
                
                img_vgg_batch_list.append(img_vgg)
                indexed_caption_batch_list.append(indexed_caption)
                future_words_batch_list.append(future_words)
                
            img_vgg_big = np.vstack(img_vgg_batch_list)
            indexed_caption_big = np.vstack(indexed_caption_batch_list)
            future_words_big = np.vstack(future_words_batch_list)
            
            yield ([img_vgg_big,indexed_caption_big], future_words_big)


In [None]:
history = model.fit_generator(generate_arrays_from_file(train_img_vgg_path,train_indexed_captions_path,train_future_words_path),
                    samples_per_epoch=2048,
                    nb_epoch=10,
                    callbacks=[callbacks.RemoteMonitor()]
#                     validation_data = generate_arrays_from_file(test_img_vgg_path,test_indexed_captions_path,test_future_words_path),
#                     nb_val_samples = 1000
                   )

In [None]:
plot_loss_from_history(history)

In [None]:
# train_score = model.evaluate([train_image_model_input, train_captions_indexed], train_future_words, batch_size=64)
# test_score = model.evaluate([test_image_model_input, test_captions_indexed], test_future_words, batch_size=64)
# print("\nTrain score = %f \nTest score = %f"%(train_score,test_score))

In [None]:
model.save_weights(model_path+'train_82000_generator_17_epoch.h5')

In [19]:
model.load_weights(model_path+'train_82000_generator_17_epoch.h5')

In [None]:
model.evaluate_generator(generate_arrays_from_file(test_img_vgg_path,test_indexed_captions_path,test_future_words_path),
                         val_samples = 2048)

In [None]:
model.fit_generator??

# Testing the model

In [23]:
NO_TEST_IMAGES = 128
TEST_WINDOW_START = 0

# images_concat_t = train_images_concat_t
# real_captions = train_captions_raw

images_concat_t = test_images_concat_t
real_captions = test_captions_raw

(images,predicted_captions) = make_prediction_on_dataset(images_concat_t)


100%|██████████| 128/128 [00:15<00:00,  8.50it/s]


In [None]:
plot_predictions(images,titles = predicted_captions)

In [46]:
predicted_captions

[u'START A wooden bench sitting on a park bench in a park END',
 u'START A train is sitting on the tracks next to a train station END',
 u'START A red double decker bus driving down a street next to a building END',
 u'START A man riding skis on a snow covered slope END',
 u'START A man in a black shirt and tie sitting on a bed END',
 u'START A clock tower with a blue sky END',
 u'START A woman cutting a pizza in a kitchen with a laptop END',
 u'START A man is playing a game of tennis on a court END',
 u'START A city street with a bus stop and a building END',
 u'START A table with a laptop computer and a keyboard and a keyboard END',
 u'START A small white bird is standing in the water END',
 u'START A man is jumping up to catch a skateboard END',
 u'START A man is playing a game of tennis on a court END',
 u'START A woman sitting on a table with a laptop computer END',
 u'START A woman sitting at a table eating a table with a donut END',
 u'START A white cow laying on a lush green fi

In [55]:
real_captions = 

In [56]:
real_captions

[u'START This metal park bench sits outside in the su END',
 u'START Railway train passenger cars with graffiti parked on a gravel lot END',
 u'START A red double decker bus driving under a bridge down a street END',
 u'START A young kid riding skis down a snow covered slope END',
 u'START A couple of men laying in bed under a blanket with a dog END',
 u'START a brick building with a clock and a white steepl END',
 u'START a man sitting on a couch with a cat in his lap playing on a compute END',
 u'START The lady is getting ready to hit a tennis ball END',
 u'START A boat being rowed down the river between two building END',
 u'START A group of desks is cluttered in an office END',
 u'START A bird standing near water and marsh grass END',
 u'START A skateboarder jumping his skateboard in the air END',
 u'START A man standing on a tennis court holding a racquet END',
 u'START A doctor looking at a computer as her patient wait END',
 u'START There is a woman holding a wine glass and a ma

In [24]:
def bleu_score_metric(reality, prediction):
    return nltk.translate.bleu_score.sentence_bleu([reality], prediction)

In [26]:
def compute_bleu_score_metric(predicted_captions, real_captions):
    real_captions = reduce(list.__add__, [list(real_caption) for real_caption in real_captions])
    bleu_scores = [bleu_score_metric(real_captions[i], predicted_captions[i]) for i in range(len(predicted_captions))]
    return np.average(bleu_scores)


In [57]:
compute_bleu_score_metric(predicted_captions, real_captions)

0.41596790734141609

In [58]:
def most_common_words(captions,word_limit):
    
    sentences = [caption.split() for caption in captions]
    words = []
    for word in sentences:
        words.extend(word)

    counter=collections.Counter(words)
    return counter.most_common(word_limit)


In [59]:
common_words2app = most_common_words(predicted_captions,500)
common_words2app = [(word,app) for word,app in common_words2app if word.lower() not in stopwords.words('english')]
common_words2app = [(word,app) for word,app in common_words2app if word not in ['START','END']]


LookupError: 
**********************************************************************
  Resource u'corpora/stopwords' not found.  Please use the NLTK
  Downloader to obtain the resource:  >>> nltk.download()
  Searched in:
    - '/home/docker/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

In [None]:
common_words2app

In [None]:
searched_word = "snow"
(found_images,found_captions) = search_images_by(searched_word,images,predicted_captions)
print("Number of results = %d"%len(found_images))

In [None]:
plot_predictions(found_images,found_captions)

# Make predictions on misc dataset

In [None]:
misc_images = []
for img_path in os.listdir(misc_images_path):
    img = PIL.Image.open(misc_images_path+img_path)
    img = img.resize((224, 224), PIL.Image.NEAREST)
    img = np.asarray(img)
    img = np.transpose(img,(2,0,1))
    img = np.expand_dims(img,axis=0)
    
    misc_images.append(img)
    
stacked_images = np.vstack(misc_images)

In [None]:
(misc_images,misc_predicted_captions) = make_prediction_on_misc_dataset(stacked_images)

plot_predictions(misc_images,misc_predicted_captions)