## Results

In [10]:
import pandas as pd
df = pd.DataFrame(columns=['model', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'CIDEr', 'ROUGE_L'])
for model, file_name in [
    ('Nearest neighbour (euclidean) fc6', 'knn-fc6-euc-evaluation.json'),
    ('Nearest neighbour (cosine) fc6', 'knn-fc6-cosine-evaluation.json'),
    ('Nearet neighbour (cosine) xception', 'knn-xception-cosine-evaluation.json'),
    ('LSTM after 1000 steps', 'lstm-1000-evaluation.json')
]:
    with open('predictions/' + file_name, 'r') as f:
        d = json.load(f)
        d['model'] = model
        df = df.append(d, ignore_index=True)
        
df

Unnamed: 0,model,Bleu_1,Bleu_2,Bleu_3,Bleu_4,CIDEr,ROUGE_L
0,Nearest neighbour (euclidean) fc6,0.467,0.274,0.162,0.099,0.347,0.351
1,Nearest neighbour (cosine) fc6,0.492,0.296,0.179,0.111,0.4,0.366
2,Nearet neighbour (cosine) xception,0.509,0.312,0.191,0.119,0.464,0.379
3,LSTM after 1000 steps,0.406,0.213,0.109,0.058,0.216,0.307


## Evaluation

Note: this needs python 2.7 kernel

In [1]:
%matplotlib inline
from pycocotools.coco import COCO
from coco_caption.pycocoevalcap.eval import COCOEvalCap
import matplotlib.pyplot as plt
import skimage.io as io
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

import json
from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.3f')

from config import ms_coco_dir

In [4]:
resFile = 'predictions/lstm-1000.json'

annFile = '%s/annotations/captions_val2017.json' % ms_coco_dir

coco = COCO(annFile)
cocoRes = coco.loadRes(resFile)

cocoEval = COCOEvalCap(coco, cocoRes)
cocoEval.params['image_id'] = cocoRes.getImgIds()  # evaluate on a subset of images
cocoEval.evaluate()

loading annotations into memory...
Done (t=0.18s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.10s)
creating index...
index created!
tokenization...
setting up scorers...
computing Bleu score...
{'reflen': 51864, 'guess': [54851, 49851, 44851, 39851], 'testlen': 54851, 'correct': [22244, 5585, 1263, 341]}
ratio: 1.05759293537
Bleu_1: 0.406
Bleu_2: 0.213
Bleu_3: 0.109
Bleu_4: 0.058
computing Rouge score...
ROUGE_L: 0.307
computing CIDEr score...
CIDEr: 0.216


In [5]:
with open(resFile[:-5] + '-evaluation.json', 'w') as f:
    json.dump(cocoEval.eval, f)