In [4]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
from utils.LETORIterator import LETORIterator
from scipy.stats import ttest_rel
from utils.evaluate import Evaluate 
%matplotlib inline

In [5]:
def get_all_LTR_metrics(file, ids, metrics):
    results = []
    for idx in ids:
        df = pd.read_pickle(file.format(idx))
        for column in df:
            if type(column) is int:
                # Get a ranking with the respective scores.
                ranking = df[str(column) + "_s"].as_matrix()
                
                # Remove all nans
                ranking = [int(x) for x in ranking if not math.isnan(float(x))]
                
                # Calculate the evaluation scores
                scores = Evaluate.compute_scores(ranking)
                
                # Append all evaluation scores together with its query id in the correct order
                results.append([column] + [scores[i] for i in metrics])
                
    df = pd.DataFrame(results, columns=["query_id"] + metrics)
    return df.groupby("query_id", as_index=False).mean().sort_values(by=["query_id"])

def get_baseline_df(test_file, score_file, metrics):
    queries = []
    scores = []
    prev_query_id = -1
    for i in range(1, 6):
        iterator = LETORIterator(test_file.format(i))
        with open(score_file.format(i), "r") as f:
            for line, (d_query_id, doc_id, rel_score, _) in zip(f, iterator.feature_iterator()):
                s_query_id, _, score = line.rstrip().split("\t")
                s_query_id, score = int(s_query_id), float(score)

                assert int(s_query_id) == int(d_query_id), str(s_query_id) + " != " + str(d_query_id)
                
                if prev_query_id == -1:
                    prev_query_id = s_query_id
                    
                
                if s_query_id != prev_query_id:
                    prev_query_id = s_query_id
                    if len(scores) > 0:
                        scores = sorted(scores, key=lambda x: -x[1])
                        queries.append((s_query_id, scores))
                        scores = []
                


                scores.append((int(rel_score), score))
    if len(scores) > 0:
        scores = sorted(scores, key=lambda x: -x[1])
        queries.append((s_query_id, scores))
        scores = []
    
    results = []
    for query, rank in queries:
        ranking = list(zip(*rank))[0]

        # Calculate the evaluation scores
        scores = Evaluate.compute_scores(ranking)
                
        # Append all evaluation scores together with its query id in the correct order
        results.append([query] + [scores[i] for i in metrics])
   
    df = pd.DataFrame(results, columns=["query_id"] + metrics)
    return df.sort_values(by=["query_id"])

def t_test(df_1, df_2, metrics):
    results = []
    for metric in metrics:
        results.append((metric, ttest_rel(df_1[metric], df_2[metric]).pvalue))
    return results
    

In [6]:
pickle_path = "storage/logs"
files = ["_baseline_masks_{}.pkl",
        "_ViP_snapshots_{}.pkl",
        "_ViP_highlights_{}.pkl",
        "_vgg16_snapshots_{}.pkl",
        "_vgg16_highlights_{}.pkl",
        "_vgg16_saliency_{}.pkl"]
metrics = ["p@1","p@5","p@10","ndcg@1","ndcg@5","ndcg@10","map"]

ids = range(1, 26)
vis_dfs = []
for file in files: 
    vis_dfs.append(get_all_LTR_metrics(os.path.join(pickle_path, file), ids, metrics).set_index("query_id"))
    
rankboost_df = get_baseline_df("storage/clueweb12_3.0/Fold{}/vali.txt", "storage/baseline/scores/rankboost_{}", metrics).set_index("query_id")
lambdamart_df = get_baseline_df("storage/clueweb12_3.0/Fold{}/vali.txt", "storage/baseline/scores/lambdamart_{}", metrics).set_index("query_id")
adarank_df = get_baseline_df("storage/clueweb12_3.0/Fold{}/vali.txt", "storage/baseline/scores/adarank_{}", metrics).set_index("query_id")

rankboost_img_df = get_baseline_df("storage/clueweb12_3.0_images/Fold{}/vali.txt", "storage/baseline/scores/rankboost_img_{}", metrics).set_index("query_id")
lambdamart_img_df = get_baseline_df("storage/clueweb12_3.0_images/Fold{}/vali.txt", "storage/baseline/scores/lambdamart_img_{}", metrics).set_index("query_id")
adarank_img_df = get_baseline_df("storage/clueweb12_3.0_images/Fold{}/vali.txt", "storage/baseline/scores/adarank_img_{}", metrics).set_index("query_id")

In [62]:

t_test( vis_dfs[4], rankboost_img_df, metrics)
# t_test( adarank_df, lambdamart_df, metrics)


[('p@1', 0.1056039630262534),
 ('p@5', 0.056914944641033784),
 ('p@10', 0.07958964270753542),
 ('ndcg@1', 0.14709801434997924),
 ('ndcg@5', 0.07765891559376295),
 ('ndcg@10', 0.05924346664879312),
 ('map', 0.3959174843152169)]

In [60]:
rankboost_img_df.mean()

p@1        0.450000
p@5        0.456000
p@10       0.444000
ndcg@1     0.257500
ndcg@5     0.278044
ndcg@10    0.287979
map        0.427051
dtype: float64

In [None]:
(lambdamart_img_df.reset_index(drop=True) <vis_dfs[4].reset_index(drop=True)).sum() - (lambdamart_img_df.reset_index(drop=True) >vis_dfs[4].reset_index(drop=True)).sum()

In [None]:
vis_dfs[5].mean()

In [None]:
ttest_rel(rankboost_img_df["p@1"]*10, vis_dfs[4]["p@1"]*10)

In [None]:
lambdamart_img_df

In [25]:
lambdamart_img_df.join(vis_dfs[4], lsuffix="_lambda", rsuffix="_highlights")[["map_lambda", "map_highlights"]].to_csv("map", sep=" ")

In [10]:
lambdamart_img_df

Unnamed: 0_level_0,p@1,p@5,p@10,ndcg@1,ndcg@5,ndcg@10,map
query_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
201,0.0,0.4,0.2,0.000000,0.229140,0.182433,0.201106
202,0.0,0.4,0.4,0.000000,0.233977,0.289823,0.375105
203,1.0,0.8,0.6,0.333333,0.404741,0.351336,0.410726
204,1.0,0.8,0.9,0.500000,0.359614,0.461962,0.508960
205,0.0,0.0,0.2,0.000000,0.000000,0.049284,0.509549
206,0.0,0.0,0.2,0.000000,0.000000,0.098568,0.116298
207,1.0,0.6,0.3,0.666667,0.532236,0.385750,0.240168
208,1.0,0.8,0.4,0.500000,0.738692,0.500701,0.333299
209,0.0,0.4,0.3,0.000000,0.311053,0.287022,0.195653
210,1.0,1.0,0.9,0.666667,0.750188,0.763322,0.580342
