#### Imports

In [1]:
import re
import json
from pathlib import Path
from functools import partial
from collections import Counter

import numpy as np
import pandas as pd
from scipy.stats import ttest_rel
import pytrec_eval

from IPython.display import display


QRELS_PATH = "../../data/test/trec_eval/qrels.txt"
RUN_FILE_PATHS = list(Path("../../data/test/trec_eval/").glob("**/run_files/*.txt"))

#### Functions

In [2]:
def get_vector_size(index):
    match = re.search("d\d+", index)
    return int(match.group()[1:])


def get_negative_samples(index):
    match = re.search("n\d+", index)
    return int(match.group()[1:])


def get_window_size(index):
    match = re.search("w\d+", index)
    return int(match.group()[1:])


def get_min_count(index):
    match = re.search("mc\d+", index)
    return int(match.group()[2:])


def get_sample(index):
    match = re.search("(?:s)(\d[e.\-\d]*)", index)
    return float(match.group(1))


def get_epochs(index):
    match = re.search("ep\d+", index)
    return int(match.group()[2:])


def get_ratio(index):
    match = re.search("(?:r\()(.*)(?:\))", index)
    return float(match.group(1))

    
def get_stategy(index):
    return index.split(".")[-1]


def filter_irrelevant_columns(df, suffixes=["_100", "_200", "_500", "_1000"]):
    is_irrelevant = lambda x: any([x.endswith(suffix) for suffix in suffixes])
    cols_to_drop = [col for col in df.columns if is_irrelevant(col)]
    return df.drop(columns=cols_to_drop)


def infer_columns(df):
    # Vector size
    df["vector_size"] = df.index.map(get_vector_size)
    df["ns"] = df.index.map(get_negative_samples)
    df["w"] = df.index.map(get_window_size)
    df["epochs"] = df.index.map(get_epochs)
    df["min_count"] = df.index.map(get_min_count)
    df["sample"] = df.index.map(get_sample)
    df["ratio"] = df.index.map(get_ratio)
    df["strategy"] = df.index.map(get_stategy)
    df = filter_irrelevant_columns(df)
    
    return df


def read_qrels(qrels_path=QRELS_PATH):
    with open(QRELS_PATH, "r") as fp:
        qrels = pytrec_eval.parse_qrel(fp)
    return qrels


def read_runfiles(runfile_paths=RUN_FILE_PATHS, pattern=None):
    if pattern:
        relevant_runfiles = [p for p in runfile_paths if pattern in str(p)]
    else:
        relevant_runfiles = runfile_paths
        
    runs = dict()
    
    for run_file in relevant_runfiles:
        run_id = run_file.stem
        with open(run_file, "r") as fp:
            run = pytrec_eval.parse_run(fp)
        runs[run_id] = run   
        
    return runs
    
    
def evaluate(qrels, runs, measures):
    run_results_by_query = dict()
    run_results = dict()
    for run_id, run in runs.items():
        evaluator = pytrec_eval.RelevanceEvaluator(qrels, measures)
        results = evaluator.evaluate(run)       
        mean_results = {measure: np.mean([v[measure] for _, v in results.items()]) 
                        for measure in results["0"].keys()}
        run_results_by_query[run_id] = results
        run_results[run_id] = mean_results

    return run_results_by_query, run_results


### Reading qrels and listing supported evaluation measures

In [3]:
qrels = read_qrels()

### Inspecting results (by run)

In [4]:
# runs = read_runfiles()
runs = read_runfiles(pattern="r(0.8).full")

run_results_by_query, run_results = evaluate(qrels, runs, ["map", "ndcg"])

results_df = infer_columns(pd.DataFrame(run_results).T)
results_df = results_df.sort_values("ndcg", ascending=False)

with pd.option_context("max_rows", None):
    display(results_df.style.format({"sample": "{:.0e}".format, "ratio": "{:.2f}".format}))

Unnamed: 0,map,ndcg,vector_size,ns,w,epochs,min_count,sample,ratio,strategy
"run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full",0.690265,0.815255,100,20,1,40,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full",0.679039,0.810748,100,30,2,40,2,0.0001,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full",0.68492,0.806648,100,20,2,40,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full",0.691488,0.805743,100,20,1,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full",0.672142,0.797485,100,20,2,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full",0.672224,0.794338,75,20,2,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full",0.669828,0.794188,50,20,1,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full",0.646164,0.792668,100,20,3,40,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full",0.659573,0.78865,75,20,1,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full",0.665353,0.780151,50,20,2,20,5,1e-05,0.8,full


### Inspecting results (by query)

In [5]:
res_query = run_results_by_query
res = [res_query[run] for run in res_query.keys()]
res_query_df = pd.DataFrame.from_dict(res[0], orient="index")

for i in range(1, len(res)):
    res_query_df += pd.DataFrame.from_dict(res[i], orient="index")

res_query_df = filter_irrelevant_columns(res_query_df / len(res))
res_query_df = res_query_df.sort_values("ndcg")

print("Queries with lowest average ndcg")
display(res_query_df.head(10))
print("Queries with highest average ndcg")
display(res_query_df.tail(10))

lowest_ndcg_doc_ids = res_query_df.head().index

Queries with lowest average ndcg


Unnamed: 0,map,ndcg
51,0.050104,0.220681
38,0.169081,0.335127
21,0.198938,0.432824
2,0.294013,0.456867
0,0.263908,0.46347
46,0.222295,0.474492
42,0.22508,0.481275
33,0.296547,0.505767
32,0.358601,0.548802
48,0.313566,0.577396


Queries with highest average ndcg


Unnamed: 0,map,ndcg
5,0.834142,0.871203
47,0.874768,0.872882
11,0.709134,0.883403
27,0.817002,0.906309
7,0.843704,0.907928
49,0.884168,0.908101
10,0.900161,0.919535
8,0.89964,0.920515
43,0.896186,0.927414
9,0.919023,0.933758


In [47]:
run = "run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full"

{k:v["ndcg"] for k,v in res_query[run].items()}

{'0': 0.5068311754437849,
 '1': 0.6856657242067933,
 '2': 0.6256370815118074,
 '3': 0.9340483318026336,
 '4': 0.9252273613906197,
 '5': 1.0,
 '6': 0.8597186998521972,
 '7': 1.0,
 '8': 1.0,
 '9': 1.0,
 '10': 1.0,
 '11': 0.9875671027445805,
 '12': 0.8612335585898477,
 '13': 0.5177415653677128,
 '14': 0.86552094046731,
 '15': 0.9446984783037966,
 '16': 0.8439386943148511,
 '17': 0.8275240460841675,
 '18': 0.8339384423491363,
 '19': 0.6602134401050815,
 '20': 0.9635825499105617,
 '21': 0.49851731908415586,
 '22': 0.8447848264584782,
 '23': 0.8965971569260327,
 '24': 0.7990188763760618,
 '25': 0.7603793323315821,
 '26': 0.8631039009858753,
 '27': 1.0,
 '28': 1.0,
 '29': 1.0,
 '30': 0.4780090533311815,
 '31': 0.8414770302980799,
 '32': 0.5547618335894342,
 '33': 0.831871012159447,
 '34': 0.7224242270408039,
 '35': 0.8451211126956059,
 '36': 0.8662120083432444,
 '37': 1.0,
 '38': 0.3408238075324603,
 '39': 0.8631039009858753,
 '40': 0.6713852295568556,
 '41': 1.0,
 '42': 0.8401250222879123,
 

In [53]:
# Unwanted articles from Proof of Concept notebook
articles_of_interest = [0, 1, 13, 17, 18, 19, 21, 24, 25, 26, 30, 31, 38, 39, 40, 41, 45, 46, 47, 48]

best_runs = Counter()
for article_id in articles_of_interest:
    best_result = None
    best_run = list()
    for run_id, results in res_query.items():
        result = results[str(article_id)]["ndcg"]
        if best_result is None:
            best_result = result
            best_run.append(run_id)
        else:
            if result > best_result:
                best_result = result
                best_run = list()
                best_run.append(run_id)
            elif result >= best_result:
                best_run.append(run_id)
                
    print(article_id, "ndcg: ", best_result)
    if isinstance(best_run, list):
        for run in best_run:
            best_runs.update([run])
            print(run)
    else:
        best_runs.update([run])
        print(run)
    print("\n")

0 ndcg:  0.906049259903061
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full


1 ndcg:  0.9922385874580825
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full


13 ndcg:  0.9628945312701557
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full


17 ndcg:  0.9587435808183749
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full


18 ndcg:  0.8962613504376156
run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full


19 ndcg:  0.8257319483794759
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full


21 ndcg:  0.676412933984772
run.Doc2Vec(dm-c,d300,n10,w4,mc5,s1e-05,t8,ep20).r(0.8).full


24 ndcg:  0.9184429297429952
run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full


25 ndcg:  0.9522357846460809
run.Doc2Vec(dm-c,d200,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full


26 ndcg:  0.9293957157305397
run.Doc2Vec(dm-c,d150,n20,w5,mc5,s1e-05,t4,ep20).r(0.8).full


30 ndcg:  0.8188008749685227
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).fu

In [54]:
print(len(articles_of_interest))
best_runs.most_common()

20


[('run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full', 2),
 ('run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 2),
 ('run.Doc2Vec(dm-c,d300,n10,w4,mc5,s1e-05,t8,ep20).r(0.8).full', 2),
 ('run.Doc2Vec(dm-c,d200,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full', 2),
 ('run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full', 2),
 ('run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full', 1),
 ('run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 1),
 ('run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full', 1),
 ('run.Doc2Vec(dm-c,d150,n20,w5,mc5,s1e-05,t4,ep20).r(0.8).full', 1),
 ('run.Doc2Vec(dm-c,d200,n20,w2,mc5,s1e-05,t8,ep20).r(0.8).full', 1),
 ('run.Doc2Vec(dm-c,d300,n20,w4,mc5,s1e-05,t8,ep20).r(0.8).full', 1),
 ('run.Doc2Vec(dm-c,d200

### Inspecting top 5 runs of each query that does poorly with the main model

In [59]:
# Unwanted articles from Proof of Concept notebook
articles_of_interest = [0, 1, 13, 17, 18, 19, 21, 24, 25, 26, 30, 31, 38, 39, 40, 41, 42, 45, 46, 47, 48]

best_runs = Counter()
for article_id in articles_of_interest:
    article_results = pd.Series({run_id:results[str(article_id)]["ndcg"] 
                                 for run_id, results in res_query.items()})
    print(article_id)
    top_5 = article_results.sort_values(ascending=False).head()
    display(top_5)
    best_runs.update(top_5.index.values)
    print("\n")   
    


0


run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full      0.906049
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full      0.836247
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full      0.811691
run.Doc2Vec(dm-c,d100,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full      0.792085
run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full    0.791202
dtype: float64



1


run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full       0.992239
run.Doc2Vec(dm-c,d100,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full      0.924630
run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full    0.887868
run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full       0.884792
run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full       0.859814
dtype: float64



13


run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full      0.962895
run.Doc2Vec(dm-c,d200,n20,w4,mc5,s1e-05,t8,ep20).r(0.8).full      0.942850
run.Doc2Vec(dm-c,d150,n10,w3,mc5,s1e-05,t4,ep20).r(0.8).full      0.861071
run.Doc2Vec(dm-c,d100,n30,w1,mc2,s0.0001,t16,ep40).r(0.8).full    0.860255
run.Doc2Vec(dm-c,d300,n10,w1,mc5,s1e-05,t8,ep20).r(0.8).full      0.835561
dtype: float64



17


run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full      0.958744
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full      0.933731
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full      0.932112
run.Doc2Vec(dm-c,d300,n10,w3,mc5,s1e-05,t4,ep20).r(0.8).full      0.922988
run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full    0.921410
dtype: float64



18


run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full     0.896261
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full     0.876133
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full    0.871739
run.Doc2Vec(dm-c,d150,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full    0.865649
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full    0.864387
dtype: float64



19


run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full      0.825732
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full      0.800824
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full       0.798973
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full      0.791409
run.Doc2Vec(dm-c,d100,n30,w1,mc2,s0.0001,t16,ep40).r(0.8).full    0.789851
dtype: float64



21


run.Doc2Vec(dm-c,d300,n10,w4,mc5,s1e-05,t8,ep20).r(0.8).full    0.676413
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full     0.651556
run.Doc2Vec(dm-c,d150,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full    0.586879
run.Doc2Vec(dm-c,d100,n10,w2,mc5,s1e-05,t4,ep20).r(0.8).full    0.564778
run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full     0.561396
dtype: float64



24


run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full    0.918443
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full    0.915491
run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full    0.909964
run.Doc2Vec(dm-c,d100,n10,w2,mc5,s1e-05,t4,ep20).r(0.8).full    0.909964
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full    0.908253
dtype: float64



25


run.Doc2Vec(dm-c,d200,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full      0.952236
run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full      0.950762
run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full    0.945197
run.Doc2Vec(dm-c,d300,n10,w1,mc5,s1e-05,t8,ep20).r(0.8).full      0.940370
run.Doc2Vec(dm-c,d200,n20,w3,mc5,s1e-05,t8,ep20).r(0.8).full      0.916711
dtype: float64



26


run.Doc2Vec(dm-c,d150,n20,w5,mc5,s1e-05,t4,ep20).r(0.8).full    0.929396
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full    0.909964
run.Doc2Vec(dm-c,d200,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full    0.876625
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full    0.875080
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full    0.863104
dtype: float64



30


run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full     0.818801
run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full     0.799375
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full    0.757061
run.Doc2Vec(dm-c,d150,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full    0.749714
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full    0.740735
dtype: float64



31


run.Doc2Vec(dm-c,d200,n20,w2,mc5,s1e-05,t8,ep20).r(0.8).full    0.889719
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full    0.873259
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full    0.850264
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full    0.841477
run.Doc2Vec(dm-c,d300,n10,w2,mc5,s1e-05,t4,ep20).r(0.8).full    0.830749
dtype: float64



38


run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full       0.619906
run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full       0.559049
run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t8,ep30).r(0.8).full     0.553145
run.Doc2Vec(dm-c,d100,n30,w1,mc2,s0.0001,t16,ep40).r(0.8).full    0.484128
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full      0.439475
dtype: float64



39


run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full     1.000000
run.Doc2Vec(dm-c,d300,n20,w4,mc5,s1e-05,t8,ep20).r(0.8).full    1.000000
run.Doc2Vec(dm-c,d300,n20,w5,mc5,s1e-05,t8,ep20).r(0.8).full    0.977859
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full    0.963861
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full    0.963861
dtype: float64



40


run.Doc2Vec(dm-c,d100,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full     0.936040
run.Doc2Vec(dm-c,d200,n20,w3,mc5,s1e-05,t8,ep20).r(0.8).full     0.936040
run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t8,ep30).r(0.8).full    0.862364
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full     0.859980
run.Doc2Vec(dm-c,d200,n10,w3,mc5,s1e-05,t4,ep20).r(0.8).full     0.859980
dtype: float64



41


run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full    1.000000
run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full     1.000000
run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full     1.000000
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full    1.000000
run.Doc2Vec(dm-c,d150,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full    0.977859
dtype: float64



42


run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full      0.846724
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full      0.843088
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full      0.840125
run.Doc2Vec(dm-c,d300,n20,w4,mc5,s1e-05,t8,ep20).r(0.8).full      0.833708
run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full    0.830676
dtype: float64



45


run.Doc2Vec(dm-c,d300,n10,w4,mc5,s1e-05,t8,ep20).r(0.8).full    1.000000
run.Doc2Vec(dm-c,d300,n20,w5,mc5,s1e-05,t8,ep20).r(0.8).full    0.877215
run.Doc2Vec(dm-c,d150,n20,w5,mc5,s1e-05,t4,ep20).r(0.8).full    0.850345
run.Doc2Vec(dm-c,d300,n10,w5,mc5,s1e-05,t8,ep20).r(0.8).full    0.831555
run.Doc2Vec(dm-c,d150,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full    0.817530
dtype: float64



46


run.Doc2Vec(dm-c,d200,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full      0.848133
run.Doc2Vec(dm-c,d300,n20,w4,mc5,s1e-05,t8,ep20).r(0.8).full      0.836909
run.Doc2Vec(dm-c,d200,n10,w5,mc5,s1e-05,t4,ep20).r(0.8).full      0.830676
run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full      0.563650
run.Doc2Vec(dm-c,d100,n30,w1,mc2,s0.0001,t16,ep40).r(0.8).full    0.562525
dtype: float64



47


run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full    1.0
run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full     1.0
run.Doc2Vec(dm-c,d100,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full    1.0
run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full     1.0
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full    1.0
dtype: float64



48


run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full     0.873673
run.Doc2Vec(dm-c,d100,n20,w5,mc5,s1e-05,t4,ep20).r(0.8).full    0.862656
run.Doc2Vec(dm-c,d150,n10,w5,mc5,s1e-05,t4,ep20).r(0.8).full    0.853316
run.Doc2Vec(dm-c,d200,n20,w1,mc5,s1e-05,t8,ep20).r(0.8).full    0.846235
run.Doc2Vec(dm-c,d150,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full    0.829153
dtype: float64





[('run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full', 6),
 ('run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 6),
 ('run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 4),
 ('run.Doc2Vec(dm-c,d100,n30,w1,mc2,s0.0001,t16,ep40).r(0.8).full', 4),
 ('run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,

In [63]:
best_runs.most_common()

[('run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full', 6),
 ('run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 6),
 ('run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full', 5),
 ('run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full', 4),
 ('run.Doc2Vec(dm-c,d100,n30,w1,mc2,s0.0001,t16,ep40).r(0.8).full', 4),
 ('run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full', 3),
 ('run.Doc2Vec(dm-c,

In [62]:
models_to_look_into = [
    "Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20)",
    "Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40)",
    "Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20)",
    "Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40)",
    "Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20)"
]

In [43]:
# All articles
articles_of_interest = np.arange(52)
best_result = None
best_run = list()
best_runs = Counter()

for article_id in articles_of_interest:
    for run_id, results in res_query.items():
        result = results[str(article_id)]["ndcg"]
        if best_result is None:
            best_result = result
            best_run.append(run_id)
        else:
            if result > best_result:
                best_result = result
                best_run = list()
                best_run.append(run_id)
            elif result == best_result and run_id not in best_run:
                best_run.append(run_id)
                
    print(article_id, "ndcg: ", best_result)
    if isinstance(best_run, list):
        for run in best_run:
            best_runs.update([run])
            print(run)
    else:
        best_runs.update([run])
        print(run)
    print("\n")

0 ndcg:  0.906049259903061
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full


1 ndcg:  0.9922385874580825
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full


2 ndcg:  0.9922385874580825
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full


3 ndcg:  0.9922385874580825
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full


4 ndcg:  0.9922385874580825
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full


5 ndcg:  1.0
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full
run.Doc2Vec(dm-c,d300,n10,w1,mc5,s1e-05,t8,ep20).r(0.8).full
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full
run.Doc2Vec(dm-c,d200,n20,w3,mc5,s1e-05,t8,ep20).r(0.8).full
run.Doc2Vec(dm-c,d150,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full
run.Doc2Vec(dm-c,d300,n10,w5,mc5,s1e-05,t8,ep20).r(0.8).full
run.Doc2Vec(dm-c,d200,n20,w5,mc5,s1e-05,t8,ep20).r(0.8).full
run.Doc2Vec(dm-c,d200,n10,w4,mc5,s1e-05,t4,ep20).r(0.8).full
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).

In [44]:
print(len(articles_of_interest))
for key in sorted(best_counter.keys(), key=best_counter.get, reverse=True):
    print(key, best_counter[key])

52
run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full 63
run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full 54
run.Doc2Vec(dm-c,d300,n20,w4,mc5,s1e-05,t8,ep20).r(0.8).full 54
run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full 52
run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full 52
run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full 51
run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full 50
run.Doc2Vec(dm-c,d300,n10,w4,mc5,s1e-05,t8,ep20).r(0.8).full 49
run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full 49
run.Doc2Vec(dm-c,d100,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full 49
run.Doc2Vec(dm-c,d300,n10,w1,mc5,s1e-05,t8,ep20).r(0.8).full 47
run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full 47
run.Doc2Vec(dm-c,d200,n20,w3,mc5,s1e-05,t8,ep20).r(0.8).full 47
run.Doc2Vec(dm-c,d150,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full 47
run.Doc2Vec(dm-c,d300,n10,w5,mc5,s1e-05,t8,ep20).r(0.8).full 47
run.Doc2Vec(dm-c,d200,n20,w5,mc5,s1e-05,

### Significance testing

In [64]:
def test_significance(run_1=None, run_2=None, measure="ndcg"):
    query_ids = list(qrels.keys())
    first_scores = [run_results_by_query[run_1][query_id][measure] for query_id in query_ids]
    second_scores = [run_results_by_query[run_2][query_id][measure] for query_id in query_ids]
    return ttest_rel(first_scores, second_scores)

**Vector dimensionality (everything else static):**
* `d100` -> `d150` not significant
* `d100` -> `d200`, `d300` significant 

Conclusions this far: best models should have vector dimensionality `d100` or `d150`

**Negative sampling:**
* (for `d100`) `n20` -> `n10` not significant for `w1`, `w2`, `w3`, significant for `w4`, `w5`
* (for `d150`) `n20` -> `n10` not significant for all window sizes except for `w4`

Conclusions this far: best models should have vector dimensionality `d100` and negative sampling value of `n20` with window sizes `w1`, `w2`, `w3`

**Window size:**
* (for `d100`, `n20`) `w1` -> `w2` not significant, `w1` -> `w3`, `w4`, `w5` significant

Conclusions this far: best models should have vector dimensionality `d100` and negative sampling value of `n20` with window sizes `w1`, `w2`

**Epochs**
Only four models were trained with `e40`:
* `Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40)`
* `Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40)`
* `Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40)`
* `Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40)`

Significance tests showed insignificant differences in perfomance between these models and their counterparts trained with 20 epochs

In [65]:
first_run = "run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full"
second_run = "run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full"

# first_run = results_df.index[0]
# second_run = results_df.index[-1]

display(results_df.loc[first_run])
display(results_df.loc[second_run])

test_significance(first_run, second_run)

map            0.646164
ndcg           0.792668
vector_size         100
ns                   20
w                     3
epochs               40
min_count             5
sample          0.00001
ratio               0.8
strategy           full
Name: run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full, dtype: object

map            0.629034
ndcg           0.765107
vector_size         100
ns                   20
w                     3
epochs               20
min_count             5
sample          0.00001
ratio               0.8
strategy           full
Name: run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full, dtype: object

Ttest_relResult(statistic=1.5276509332384145, pvalue=0.1327780322212468)

In [66]:
results_df.index[0]

'run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full'

In [67]:
results_df.iloc[0:13]

Unnamed: 0,map,ndcg,vector_size,ns,w,epochs,min_count,sample,ratio,strategy
"run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep40).r(0.8).full",0.690265,0.815255,100,20,1,40,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n30,w2,mc2,s0.0001,t16,ep40).r(0.8).full",0.679039,0.810748,100,30,2,40,2,0.0001,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep40).r(0.8).full",0.68492,0.806648,100,20,2,40,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full",0.691488,0.805743,100,20,1,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full",0.672142,0.797485,100,20,2,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d75,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full",0.672224,0.794338,75,20,2,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d50,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full",0.669828,0.794188,50,20,1,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full",0.646164,0.792668,100,20,3,40,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d75,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full",0.659573,0.78865,75,20,1,20,5,1e-05,0.8,full
"run.Doc2Vec(dm-c,d50,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full",0.665353,0.780151,50,20,2,20,5,1e-05,0.8,full


In [68]:
run_len = len(results_df)
for i in range(1, run_len):
    _, pval = test_significance(results_df.index[0], results_df.index[i], "ndcg")
    if (pval < 0.05):
        status_msg = "{:2}\t{}\t{:.6f}\t{:.3f}".format(i, results_df.index[i],
                                                    results_df.loc[results_df.index[i], "ndcg"], 
                                                    pval)
        print(status_msg)

10	run.Doc2Vec(dm-c,d150,n20,w1,mc5,s1e-05,t4,ep20).r(0.8).full	0.779419	0.043
12	run.Doc2Vec(dm-c,d200,n20,w2,mc5,s1e-05,t8,ep20).r(0.8).full	0.776343	0.019
13	run.Doc2Vec(dm-c,d200,n10,w1,mc5,s1e-05,t8,ep20).r(0.8).full	0.771025	0.006
14	run.Doc2Vec(dm-c,d200,n20,w1,mc5,s1e-05,t8,ep20).r(0.8).full	0.770504	0.005
15	run.Doc2Vec(dm-c,d150,n10,w1,mc5,s1e-05,t4,ep20).r(0.8).full	0.769964	0.008
16	run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full	0.768682	0.008
17	run.Doc2Vec(dm-c,d150,n20,w3,mc5,s1e-05,t4,ep40).r(0.8).full	0.766806	0.025
18	run.Doc2Vec(dm-c,d100,n20,w4,mc5,s1e-05,t4,ep20).r(0.8).full	0.766104	0.033
19	run.Doc2Vec(dm-c,d100,n10,w2,mc5,s1e-05,t4,ep20).r(0.8).full	0.765967	0.008
20	run.Doc2Vec(dm-c,d100,n20,w3,mc5,s1e-05,t4,ep20).r(0.8).full	0.765107	0.010
21	run.Doc2Vec(dm-c,d150,n20,w2,mc5,s1e-05,t4,ep20).r(0.8).full	0.764743	0.010
22	run.Doc2Vec(dm-c,d200,n10,w2,mc5,s1e-05,t4,ep20).r(0.8).full	0.761220	0.001
23	run.Doc2Vec(dm-c,d150,n10,w2,mc5,s1e-05,t4,ep20).

In [None]:
run_results_by_query[results_df.index[0]]

In [None]:
run_results_by_query[results_df.index[12]]

In [None]:
challenger_model = pd.DataFrame.from_dict(run_results_by_query[results_df.index[12]], orient="index")
top_model = pd.DataFrame.from_dict(run_results_by_query[results_df.index[0]], orient="index")

In [None]:
measure_diffs = top_model - challenger_model

display(measure_diffs)

In [None]:
display(measure_diffs.loc[lowest_ndcg_doc_ids])

In [None]:
measure_diffs.loc[lowest_ndcg_doc_ids].mean()

In [None]:
ttest_1samp(measure_diffs.loc[lowest_ndcg_doc_ids].map, 0)