In [None]:

import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import datetime
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu
from keras.models import load_model
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw 
%load_ext autoreload
%autoreload 2
from sequence_candidate import SequenceCandidate
from beam_predict import generate_predictions_beam
import dask.dataframe as dd
from dask.multiprocessing import get

In [None]:
with open("../data/features/valid_features.pkl", "rb") as handle:
    valid_features = pickle.load(handle)
handle.close()

In [None]:
valid_features["_ExrVJTjGcChfzLH51etAw"][0].shape

In [None]:
valid_captions = pd.read_csv("../data/split_lists/valid_ids.csv")

In [None]:
valid_captions.head()

In [None]:
with open("../data/tokenizer/tokenizer.pkl","rb") as handle:
    tokenizer = pickle.load(handle)
handle.close()

In [None]:
1 + len(tokenizer.word_index)

In [None]:
reverse_tokenizer = {index: word for word,index in tokenizer.word_index.items()} 

In [None]:
merge_concat = load_model("models/model_merge-date_5-18-14-40-ep016-loss4.704_lr-0.010000_patience-3.h5")
merge_add = load_model("models/model_merge_add-date_6-4-14-11-ep014-loss4.864_lr-0.010000_patience-3.h5")
inject = load_model("models/model_inject-date_5-16-15-45-ep030-loss5.009_lr-0.010000_patience-3.h5")

In [None]:
merge_concat.summary()

In [None]:
merge_add.summary()

In [None]:
inject.summary()

In [None]:
tmp = valid_captions.head(100)

In [None]:
str(int(.9*10))

In [None]:
def add_predictions(df):
    models = [merge_concat, merge_add, inject]
    model_names = ["merge_concat", "merge_add", "inject"]
    alpha_range = [.6, .7, .8]
    for i in range(len(models)):
        for alpha in alpha_range:
            colname = "_".join([model_names[i], str(int(alpha*10)), 'pred'])
            df[colname] = df.photo_id.apply(lambda x:
                            generate_predictions_beam(img_id = x, features= valid_features, 
                                caption_model=models[i], 
                                  reverse_tokenizer=reverse_tokenizer,
                                  width = 3, num_neighbors = 5, top_n = 1,
                                  alpha = alpha)[0][0])
            print("done: alpha = %f, model = %s, time: %s" %(alpha, model_names[i], str(datetime.datetime.now())))
    return df

In [None]:
%time df_full = add_predictions(valid_captions)

In [None]:
df_full.to_csv("df_full.csv")

In [None]:
def bleu1(ref, cand):
    return sentence_bleu([ref.split()], cand.split(), weights=(1.0, 0, 0, 0))

def bleu2(ref, cand):
    return sentence_bleu([ref.split()], cand.split(), weights=(.5, .5, 0, 0))

def bleu3(ref, cand):
    return sentence_bleu([ref.split()], cand.split(), weights=(.33, .33, .33, 0))

def bleu4(ref, cand):
    return sentence_bleu([ref.split()], cand.split(), weights=(.25, .25, .25, .25))

In [None]:
pred_cols = df_full.columns[2:]
print(pred_cols)

In [None]:
df_full.apply(lambda row: bleu1(str(row["caption"]), str(row["merge_concat_6_pred"])), axis = 1)

In [None]:
def add_bleu(df):
    bleu_funcs = [bleu1, bleu2, bleu3, bleu4]
    for col in pred_cols:
        for ngram in range(1,5):
            colname = "%s_bleu%d"%(col[:-5], ngram)
            bleu_func = bleu_funcs[ngram-1]
            df[colname] = df.apply(lambda row: bleu_func(str(row["caption"]), str(row[col])), axis = 1)
    return df

In [None]:
df_full = add_bleu(df_full)

In [None]:
reference = [['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy', 'dog']]
candidate = ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the']
sentence_bleu(reference, candidate)

In [None]:
sentence_bleu(["grilled chicken salad".split()] , "shanghai rainbow chicken".split(), weights=(1.0, 0, 0, 0))

In [None]:
df_full.head(100)

In [None]:
df_full.iloc[85].caption