## Results

In [10]:
import pandas as pd
df = pd.DataFrame(columns=['model', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'CIDEr', 'ROUGE_L'])
for model, file_name in [
    ('Nearest neighbour (euclidean) fc6', 'knn-fc6-euc-evaluation.json'),
    ('Nearest neighbour (cosine) fc6', 'knn-fc6-cosine-evaluation.json'),
    ('Nearet neighbour (cosine) xception', 'knn-xception-cosine-evaluation.json'),
    ('LSTM after 1000 steps', 'lstm-1000-evaluation.json'),
    ('LSTM after 3050 steps', 'lstm-3050-evaluation.json'),
    ('LSTM after 5910 steps (no dropout)', 'lstm-no-dropout-final-evaluation.json'),
    ('LSTM after 5910 steps (dropout 0.1)', 'lstm-dropout-final-evaluation.json')
]:
    with open('predictions/' + file_name, 'r') as f:
        d = json.load(f)
        d['model'] = model
        df = df.append(d, ignore_index=True)
        
df

Unnamed: 0,model,Bleu_1,Bleu_2,Bleu_3,Bleu_4,CIDEr,ROUGE_L
0,Nearest neighbour (euclidean) fc6,0.467,0.274,0.162,0.099,0.347,0.351
1,Nearest neighbour (cosine) fc6,0.492,0.296,0.179,0.111,0.4,0.366
2,Nearet neighbour (cosine) xception,0.509,0.312,0.191,0.119,0.464,0.379
3,LSTM after 1000 steps,0.406,0.213,0.109,0.058,0.216,0.307
4,LSTM after 3050 steps,0.439,0.242,0.133,0.074,0.289,0.328
5,LSTM after 5910 steps (no dropout),0.453,0.257,0.144,0.081,0.315,0.34
6,LSTM after 5910 steps (dropout 0.1),0.332,0.132,0.047,0.019,0.032,0.252


## Evaluation

Note: this needs python 2.7 kernel

In [1]:
%matplotlib inline
from pycocotools.coco import COCO
from coco_caption.pycocoevalcap.eval import COCOEvalCap
import matplotlib.pyplot as plt
import skimage.io as io
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

import json
from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.3f')

from config import ms_coco_dir

In [6]:
resFile = 'predictions/lstm-no-dropout-final.json'

annFile = '%s/annotations/captions_val2017.json' % ms_coco_dir

coco = COCO(annFile)
cocoRes = coco.loadRes(resFile)

cocoEval = COCOEvalCap(coco, cocoRes)
cocoEval.params['image_id'] = cocoRes.getImgIds()  # evaluate on a subset of images
cocoEval.evaluate()

loading annotations into memory...
Done (t=0.16s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.03s)
creating index...
index created!
tokenization...
setting up scorers...
computing Bleu score...
{'reflen': 51161, 'guess': [53782, 48782, 43782, 38782], 'testlen': 53782, 'correct': [24368, 7086, 1985, 559]}
ratio: 1.05123042943
Bleu_1: 0.453
Bleu_2: 0.257
Bleu_3: 0.144
Bleu_4: 0.081
computing Rouge score...
ROUGE_L: 0.340
computing CIDEr score...
CIDEr: 0.315


In [7]:
with open(resFile[:-5] + '-evaluation.json', 'w') as f:
    json.dump(cocoEval.eval, f)