In [None]:
from keras.models import Sequential,Model
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers import Embedding,GRU,TimeDistributed,RepeatVector,Merge,BatchNormalization,Input
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Embedding,LSTM,GRU,TimeDistributed,RepeatVector,Merge,Input,merge,UpSampling2D
from keras.preprocessing import sequence
from keras import callbacks
from keras.optimizers import SGD, RMSprop, Adam

import numpy as np
from vgg16 import Vgg16
import matplotlib.pyplot as plt
import PIL.Image

from tqdm import tqdm

from utils import *

import cPickle as pickle
import string

import collections
import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords

import re
from numpy.random import random, permutation, randn, normal 

import os

import preprocessing as preproc

import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from matplotlib import animation
from IPython.display import display, HTML

import os 
import pandas as pd
import PIL.Image

In [None]:
def get_vgg_model():
    image_model = Vgg16().model
    image_model.pop()
    image_model.pop()
    image_model.trainable = False
    image_model.add(RepeatVector(MAX_CAPTION_LEN))
    return image_model

def get_precomputed_input_model():
    input_model = Sequential()
    input_model.add(RepeatVector(MAX_CAPTION_LEN,input_shape=(4096,)))
    return input_model

# GRU

def get_language_model(emb):
    language_model = Sequential()
    language_model.add(Embedding(VOCAB_SIZE, EMB_SIZE, input_length=MAX_CAPTION_LEN,weights=[emb]))
    Dropout(0.5)
    language_model.add(BatchNormalization())
    return language_model

def get_reinforcement_model():
    reinforcement_model = Sequential()
    reinforcement_model.add(Embedding(VOCAB_SIZE, EMB_SIZE, input_length=MAX_CAPTION_LEN,weights=[emb]))
    Dropout(0.5)
    reinforcement_model.add(BatchNormalization())
    return reinforcement_model

# Top level model

def build_model(image_model,language_model,reinforcement_model):
    model = Sequential()
    model.add(Merge([image_model, language_model,reinforcement_model], mode='concat'))

    model.add(GRU(1024,activation='relu', return_sequences=True))
    model.add(Dropout(0.5))
    model.add(GRU(1024,activation='relu', return_sequences=True))
    
    model.add(TimeDistributed(Dense(VOCAB_SIZE, activation = 'softmax')))

    model.compile(loss='categorical_crossentropy', optimizer = Adam(0.001))
    return model

def get_vgg_features(img_name):
    
    full_img_name = base_img_folder+"/"+img_name
    img = PIL.Image.open(full_img_name)
    img = img.resize((224, 224), PIL.Image.NEAREST)
    img = np.asarray(img)
    
    if(img.shape != (224,224,3)): #Black & White picture 
        img = np.expand_dims(img,axis=2)
        img = np.concatenate([img,img,img],axis=2)
        print(img.shape)
        
    img = np.transpose(img, (2,0,1))
    img = np.expand_dims(img, axis=0)
        
    vgg_features = vgg_model.predict(img)
        
    return vgg_features

## Load Model

In [None]:
base_path = data_path+"app-100-length-15/"
MAX_CAPTION_LEN = 15 # ATENTIE AICI


In [None]:
unique_words = preproc.load_obj(base_path + general_datastruct_folder+"unique_words")
word2index = preproc.load_obj(base_path+general_datastruct_folder+"word2index")
index2word = preproc.load_obj(base_path+general_datastruct_folder+"index2word")
VOCAB_SIZE = len(unique_words)


In [None]:
EMB_SIZE = 300
vecs, words, wordidx = preproc.load_vectors(save_path+glove_folder+"6B."+str(EMB_SIZE)+"d")

emb = preproc.create_emb(vecs, words, wordidx,index2word,VOCAB_SIZE)


In [None]:
vgg_model = get_vgg_model()
image_model = get_precomputed_input_model()
language_model = get_language_model(emb)
reinforcement_model = get_reinforcement_model()
model = build_model(image_model,language_model,reinforcement_model)

In [None]:
model.load_weights(save_path + models_folder+"big/" +'app_100_length_15_past_word_20_epoch_300d_gru_2x1048_big.h5')

# Create final testing dict

In [None]:
base_img_folder = "/home/docker/fastai-courses/deeplearning1/nbs/persistent/coco/raw_images/val2014"

In [None]:
img_names = os.listdir(base_img_folder)
img_names.sort()

In [None]:
data_df = pd.DataFrame(img_names, columns = ["img_name"])
data_df.shape

In [None]:
data_df.head()

In [None]:
data_df['vgg'] = data_df['img_name'].apply(get_vgg_features)

In [None]:
data_df.shape

In [None]:
data_df.head()

In [None]:
base_data_folder = "/home/docker/fastai-courses/deeplearning1/nbs/persistent/coco/coco-validation/base-data/"

In [None]:
data_df.to_pickle(base_data_folder+"all-valid-df.p")

# Generate Predictions

In [None]:
data_df = pd.read_pickle(base_data_folder+"all-valid-df.p")
data_df.shape

In [None]:
def make_prediction(vgg_features):
    startIndex = word2index["START"]
    start_captions = [[startIndex]]
    start_captions = sequence.pad_sequences(start_captions, maxlen=MAX_CAPTION_LEN,padding='post')

    first_image_input = np.squeeze(vgg_features)[0].reshape(1,4096)
    
    firstCaption = np.expand_dims(start_captions[0], axis=0) 
    prev_word_indexed_captions = np.expand_dims(list(start_captions[0]), axis=0)
    
    outputs = []

    endGenerated = False
    i = 0
        
    while ((not endGenerated) & (i < MAX_CAPTION_LEN-1)):
       
        predictions = model.predict([first_image_input, firstCaption, prev_word_indexed_captions])
        predictions = predictions[0]
        
        currentPred = predictions[i]
        
        max_index = np.argmax(currentPred)
        
        outputs.append(max_index)
        firstCaption[0,i+1] = max_index
        
        prev_word_indexed_captions[0,i+1] = firstCaption[0,i]
                
        i+=1

        if(index2word[max_index] == "END"):
            endGenerated = True

    caption = ' '.join([index2word[x] for x in firstCaption[0][1:i]])
    
    return caption



In [None]:
data_df['caption'] = data_df['vgg'].apply(make_prediction)

In [None]:
data_df["caption"].tolist()[:10]

In [None]:
data_df.shape

In [None]:
data_df.head()

In [None]:
def get_img_id(path):
    return long(path.split(".")[0].split("_")[2])

In [None]:
data_df['image_id'] = data_df["img_name"].apply(get_img_id)

In [None]:
data_df[["image_id","caption"]].to_json(base_data_folder+"all-valid-df-with-pred.json",orient="records")

In [None]:
data_df.head()

## Evaluate

In [1]:
import sys
sys.path.append("/home/docker/fastai-courses/deeplearning1/nbs/persistent/coco/coco-validation")

from pycocotools.coco import COCO
from pycocoevalcap.eval import COCOEvalCap

In [2]:
annFile='/home/docker/fastai-courses/deeplearning1/nbs/persistent/coco/coco-validation/annotations/captions_val2014.json'
resFile = "/home/docker/fastai-courses/deeplearning1/nbs/persistent/coco/coco-validation/base-data/all-valid-df-with-pred.json"

In [3]:
# create coco object and cocoRes object
coco = COCO(annFile)
cocoRes = coco.loadRes(resFile)

loading annotations into memory...
0:00:00.649435
creating index...
index created!
Loading and preparing results...     
DONE (t=0.16s)
creating index...
index created!


In [5]:
# create cocoEval object by taking coco and cocoRes
cocoEval = COCOEvalCap(coco, cocoRes)

# evaluate results
cocoEval.evaluate()

tokenization...
setting up scorers...
computing Bleu score...
{'reflen': 381670, 'guess': [381985, 341481, 300977, 260473], 'testlen': 381985, 'correct': [247171, 112227, 46585, 19826]}
ratio: 1.0008253203
Bleu_1: 0.647
Bleu_2: 0.461
Bleu_3: 0.320
Bleu_4: 0.224
computing METEOR score...
METEOR: 0.212
computing Rouge score...
ROUGE_L: 0.473
computing CIDEr score...
CIDEr: 0.679
