In [79]:
from __future__ import division
import pandas as pd
import numpy as np
import gzip
import pickle

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from apply_model import summarise_eval

pd.set_option('display.precision', 3)

In [80]:
model_names = ['best_FR', 'best_L2']
with gzip.open('EvalOut/' + model_names[0] + '_results.pklz', 'r') as f:
    results_fr = pickle.load(f)
results_fr.shape

(4950, 11)

In [81]:
with gzip.open('EvalOut/' + model_names[1] + '_results.pklz', 'r') as f:
    results_l2 = pickle.load(f)
results_l2.shape

(4950, 11)

In [82]:
fr_succ_count = results_fr.loc[results_fr['suc'] == True]
len(fr_succ_count)

2335

In [83]:
l2_succ_count = results_l2.loc[results_l2['suc'] == True]
len(l2_succ_count)

2349

In [84]:
results = results_l2.merge(results_fr, how='outer', suffixes=('_l2', '_fr'),
                           on=['i_corpus', 'image_id', 'region_id', 'r_corpus', 'rex_id', 'refexp_EN', 'refexp'])

succ_diff = results.loc[results['suc_l2'] != results['suc_fr']]
succ_diff

Unnamed: 0,i_corpus,image_id,region_id,r_corpus,rex_id,refexp_EN,refexp,cov_l2,suc_l2,rnk_l2,nob_l2,cov_fr,suc_fr,rnk_fr,nob_fr
1,0,20909,3,referit,35225,dirt ground,terre battue,0.50,True,1.00,3.00,0.50,False,2.00,3.00
3,0,2050,10,referit,56,man in red sweater,homme en pull rouge,0.75,True,1.00,12.00,0.75,False,2.00,12.00
4,0,2050,10,referit,2465,man in red,homme en rouge,1.00,True,1.00,12.00,1.00,False,2.00,12.00
16,0,2050,10,referit,109509,man in red,homme en rouge,1.00,True,1.00,12.00,1.00,False,2.00,12.00
21,0,19314,6,referit,29175,bottom left,en bas à gauche,1.00,True,1.00,8.00,1.00,False,5.00,8.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4930,0,31223,1,referit,98321,anywhere on animal,n'importe où sur l'animal,0.75,True,1.00,2.00,0.75,False,2.00,2.00
4931,0,31223,2,referit,112258,bottom,bas,1.00,True,1.00,2.00,1.00,False,2.00,2.00
4934,0,32216,2,referit,100562,grey wall,mur gris,0.50,True,1.00,2.00,0.50,False,2.00,2.00
4936,0,31967,1,referit,101875,middle grass,herbe au milieu,1.00,True,1.00,2.00,1.00,False,2.00,2.00


In [85]:
succ_l2 = succ_diff.loc[succ_diff['suc_l2'] == True]
succ_l2

Unnamed: 0,i_corpus,image_id,region_id,r_corpus,rex_id,refexp_EN,refexp,cov_l2,suc_l2,rnk_l2,nob_l2,cov_fr,suc_fr,rnk_fr,nob_fr
1,0,20909,3,referit,35225,dirt ground,terre battue,0.50,True,1.00,3.00,0.50,False,2.00,3.00
3,0,2050,10,referit,56,man in red sweater,homme en pull rouge,0.75,True,1.00,12.00,0.75,False,2.00,12.00
4,0,2050,10,referit,2465,man in red,homme en rouge,1.00,True,1.00,12.00,1.00,False,2.00,12.00
16,0,2050,10,referit,109509,man in red,homme en rouge,1.00,True,1.00,12.00,1.00,False,2.00,12.00
21,0,19314,6,referit,29175,bottom left,en bas à gauche,1.00,True,1.00,8.00,1.00,False,5.00,8.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4930,0,31223,1,referit,98321,anywhere on animal,n'importe où sur l'animal,0.75,True,1.00,2.00,0.75,False,2.00,2.00
4931,0,31223,2,referit,112258,bottom,bas,1.00,True,1.00,2.00,1.00,False,2.00,2.00
4934,0,32216,2,referit,100562,grey wall,mur gris,0.50,True,1.00,2.00,0.50,False,2.00,2.00
4936,0,31967,1,referit,101875,middle grass,herbe au milieu,1.00,True,1.00,2.00,1.00,False,2.00,2.00


In [86]:
succ_fr = succ_diff.loc[succ_diff['suc_fr'] == True]
succ_fr

Unnamed: 0,i_corpus,image_id,region_id,r_corpus,rex_id,refexp_EN,refexp,cov_l2,suc_l2,rnk_l2,nob_l2,cov_fr,suc_fr,rnk_fr,nob_fr
33,0,12953,1,referit,5670,whole table,table entière,0.50,False,2.00,21.00,0.50,True,1.00,21.00
34,0,12953,1,referit,8303,table,table,1.00,False,2.00,21.00,1.00,True,1.00,21.00
38,0,12953,1,referit,21655,table,table,1.00,False,2.00,21.00,1.00,True,1.00,21.00
42,0,12953,1,referit,45406,table,table,1.00,False,2.00,21.00,1.00,True,1.00,21.00
43,0,12953,1,referit,45458,table,table,1.00,False,2.00,21.00,1.00,True,1.00,21.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4857,0,31001,1,referit,76366,bottom left corner dirt,coin inférieur gauche saleté,1.00,False,2.00,3.00,1.00,True,1.00,3.00
4860,0,31001,1,referit,114316,dirt bottom left,saleté en bas à gauche,1.00,False,2.00,3.00,1.00,True,1.00,3.00
4889,0,27030,1,referit,100692,sky,ciel,1.00,False,2.00,3.00,1.00,True,1.00,3.00
4906,0,39837,2,referit,86021,red carpet,tapis rouge,0.50,False,2.00,2.00,0.50,True,1.00,2.00


In [87]:
succ_fr.loc[succ_fr['refexp'] == 'table']

Unnamed: 0,i_corpus,image_id,region_id,r_corpus,rex_id,refexp_EN,refexp,cov_l2,suc_l2,rnk_l2,nob_l2,cov_fr,suc_fr,rnk_fr,nob_fr
34,0,12953,1,referit,8303,table,table,1.0,False,2.0,21.0,1.0,True,1.0,21.0
38,0,12953,1,referit,21655,table,table,1.0,False,2.0,21.0,1.0,True,1.0,21.0
42,0,12953,1,referit,45406,table,table,1.0,False,2.0,21.0,1.0,True,1.0,21.0
43,0,12953,1,referit,45458,table,table,1.0,False,2.0,21.0,1.0,True,1.0,21.0
48,0,12953,1,referit,64935,table,table,1.0,False,2.0,21.0,1.0,True,1.0,21.0


In [88]:
total_l2 = results.loc[results['suc_l2'] == True]
len(total_l2)

2349

In [89]:
total_fr = results.loc[results['suc_fr'] == True]
len(total_fr)

2335

In [90]:
def mrr_f(series):
    return np.mean(series.apply(lambda x:(1/x)))
def acc_f(series):
    return np.count_nonzero(np.nan_to_num(series.tolist())) / len(series)

In [91]:
def summarise_rdf(rdf):
    this_row = {}
    # for the full data frame
    this_row['acc-full'] = acc_f(rdf['suc'])
    this_row['mrr-full'] = mrr_f(rdf['rnk'])
    this_row['arc-full'] = rdf['cov'].mean()
    this_row['rnd-full'] = mrr_f(rdf['nob'])
    # for the ones where at least one word was known
    nz_rdf = rdf.query('cov > 0')
    this_row['>0 wrcov'] = len(nz_rdf) / len(rdf)
    this_row['acc->0wc'] = acc_f(nz_rdf['suc'])
    this_row['mrr->0wc'] = mrr_f(nz_rdf['rnk'])
    this_row['arc->0wc'] = nz_rdf['cov'].mean()
    this_row['rnd->0wc'] = mrr_f(nz_rdf['nob'])
    # binned by refexp length
    lens = nz_rdf['refexp'].apply(lambda x: len(x.split()))
    this_bin = nz_rdf[(lens > 0) & (lens <= 2)]
    this_row['acc-b1-2'] = acc_f(this_bin['suc'])
    this_row['12%'] = len(this_bin) / len(nz_rdf)
    this_bin = nz_rdf[(lens > 2) & (lens <= 4)]
    this_row['acc-b3-4'] = acc_f(this_bin['suc'])
    this_row['34%'] = len(this_bin) / len(nz_rdf)
    this_bin = nz_rdf[(lens > 4) & (lens <= 6)]
    this_row['acc-b5-6'] = acc_f(this_bin['suc'])
    this_row['56%'] = len(this_bin) / len(nz_rdf)
    return this_row

In [92]:
short_names = ['FR', 'L2']
rlist = []
for name, results in zip(short_names, [results_fr, results_l2]):
    rdf = results.dropna(axis=0, subset=['image_id', 'refexp'])
    no_na = rdf.dropna(axis=0)
    filledna = rdf.fillna(value={'cov':0.0, 'suc':False})

    rlist.extend([(name + ': drop rfx NaN', rdf), 
                 (name + ': drop all NaN', no_na),
                 (name + ': fill NaNs', filledna)])
len(rlist) 

6

In [93]:
index = []
rows = []

for model, rdf in rlist:
    index.append(model)
    this_resdict = summarise_rdf(rdf)
    this_resdict['%tst'] = 1.0
    rows.append(this_resdict)

collected_columns = {}
for this_row in rows:
    for this_key, this_val in this_row.items():
        this_list = collected_columns.get(this_key, list())
        this_list.append(this_val)
        collected_columns[this_key] = this_list
        
full_df = pd.DataFrame(collected_columns, index=index)
full_df = full_df[['%tst', 'acc-full', 'mrr-full', 'arc-full', 'rnd-full', '>0 wrcov', 
                   'acc->0wc', 'mrr->0wc', 'arc->0wc', 'rnd->0wc',
                   'acc-b1-2', '12%', 'acc-b3-4', '34%', 'acc-b5-6', '56%']]

In [94]:
pd.set_option('display.float_format', '{:.2f}'.format)
full_df

Unnamed: 0,%tst,acc-full,mrr-full,arc-full,rnd-full,>0 wrcov,acc->0wc,mrr->0wc,arc->0wc,rnd->0wc,acc-b1-2,12%,acc-b3-4,34%,acc-b5-6,56%
FR: drop rfx NaN,1.0,0.47,0.67,0.82,0.21,0.93,0.51,0.67,0.89,0.21,0.69,0.37,0.43,0.35,0.39,0.18
FR: drop all NaN,1.0,0.51,0.67,0.89,0.21,1.0,0.51,0.67,0.89,0.21,0.69,0.37,0.43,0.35,0.39,0.18
FR: fill NaNs,1.0,0.47,0.67,0.82,0.21,0.93,0.51,0.67,0.89,0.21,0.69,0.37,0.43,0.35,0.39,0.18
L2: drop rfx NaN,1.0,0.47,0.68,0.82,0.21,0.93,0.51,0.68,0.89,0.21,0.69,0.37,0.44,0.35,0.39,0.18
L2: drop all NaN,1.0,0.51,0.68,0.89,0.21,1.0,0.51,0.68,0.89,0.21,0.69,0.37,0.44,0.35,0.39,0.18
L2: fill NaNs,1.0,0.47,0.68,0.82,0.21,0.93,0.51,0.68,0.89,0.21,0.69,0.37,0.44,0.35,0.39,0.18


In [96]:
top2_l2 = results_l2.loc[results_l2['rnk'] <= 2.0]
len(top2_l2)

3215

In [97]:
top2_fr = results_fr.loc[results_fr['rnk'] <= 2.0]
len(top2_fr)

3180

In [100]:
sfr = summarise_eval(results_fr)
sl2 = summarise_eval(results_l2)

total = [len(total_fr), len(total_l2)]
diff = [len(succ_fr), len(succ_l2)]
top2 = [len(top2_fr)/len(results_fr), len(top2_l2)/len(results_l2)]
acc = [sfr.loc[0,'acc'], sl2.loc[0,'acc']]
mrr = [sfr.loc[0,'mrr'], sl2.loc[0,'mrr']]
acv = [sfr.loc[0,'acv'], sl2.loc[0,'acv']]
table = pd.DataFrame({'acc': acc, 'total suc': total, 'diff': diff, 
                      'mrr': mrr, 'acc for rank <3': top2, 'avg cov': acv}, 
                     index=['FR', 'FR as L2'])
table

Unnamed: 0,acc,total suc,diff,mrr,acc for rank <3,avg cov
FR,0.47,2335,467,0.67,0.64,0.82
FR as L2,0.47,2349,481,0.68,0.65,0.82


In [192]:
#with gzip.open('EvalOut/full_df_precomp.pklz', 'w') as f:
#    pickle.dump((full_df, full_df_rp), f)