In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import pickle
import json
import logging
logging.basicConfig(level=logging.INFO, handlers=[logging.FileHandler("ensemble.log"), logging.StreamHandler(sys.stdout)])

import pandas as pd
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 1000)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from evaluation import *
from preprocessing import Corpus, BasicPreprocessing, BigramPreprocessor, SpacyPreprocessor, StopWordPreprocessor
from retrieval_algorithms.ensemble_retrieval_algorithm import EnsembleRetrievalAlgorithm

In [2]:
with open("../../data/kit_expert_2019_all_keywords.json", "r") as file:
    keywords = json.load(file)

In [3]:
general_keywords = [k for k in keywords if k["level"]<=1]
specific_keywords = [k for k in keywords if k["level"]>=2 and len(k["paper_ids"])>=10]

general_keywords_val = ("general keywords validation", general_keywords[0:int(len(general_keywords)*0.8)])
specific_keywords_val = ("specific keywords validation", specific_keywords[0:int(len(specific_keywords)*0.8)])
general_keywords_test = ("general keywords test", general_keywords[int(len(general_keywords)*0.8):])
specific_keywords_test = ("specific keywords test", specific_keywords[int(len(specific_keywords)*0.8):])

In [4]:
bm25_file_path = "../../data/models/tfidf/bm25_oqe.model"
sent2vec_file_path = "../../data/models/sent2vec/sent2vec_oqe.model"
ensemble_file_path = "../../data/models/ensemble_model.model"

In [20]:
with open(bm25_file_path, "rb") as file:
    best_bm25_model = pickle.load(file)
with open(sent2vec_file_path, "rb") as file:
    best_sent2vec_model = pickle.load(file)  
with open(ensemble_file_path, "rb") as file:
    best_ensemble_model = pickle.load(file)
best_ensemble_model.weight = 0.04
best_ensemble_model.model1 = best_bm25_model
best_ensemble_model.model2 = best_sent2vec_model

In [None]:
best_models = [
#     ("Best BM25 model", best_bm25_model, None),
#     ("Best Sent2Vec model", best_sent2vec_model, None),
    ("Best ensemble model", best_ensemble_model, None),
]
best_models_val_results = evaluate_models(best_models, [general_keywords_val, specific_keywords_val], n_jobs=1)
best_models_test_results = evaluate_models(best_models, [general_keywords_test, specific_keywords_test], n_jobs=1)

HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=6617.0), HTML(value='')), layout=Layout(d…

In [24]:
best_models_val_results

Unnamed: 0_level_0,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,general keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation,specific keywords validation
Unnamed: 0_level_1,p@5,p@5,p@10,p@10,p@20,p@20,R-prec,R-prec,mAP,mAP,bpref,bpref,p@5,p@5,p@10,p@10,p@20,p@20,R-prec,R-prec,mAP,mAP,bpref,bpref
Unnamed: 0_level_2,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err
Best ensemble model,0.383,0.04,0.364,0.036,0.333,0.033,0.226,0.02,0.183,0.019,0.177,0.018,0.737,0.007,0.682,0.007,0.578,0.006,0.569,0.006,0.583,0.006,0.564,0.007


In [37]:
best_models_test_results


Unnamed: 0_level_0,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test
Unnamed: 0_level_1,p@5,p@5,p@10,p@10,p@20,p@20,R-prec,R-prec,mAP,mAP,bpref,bpref,p@5,p@5,p@10,p@10,p@20,p@20,R-prec,R-prec,mAP,mAP,bpref,bpref
Unnamed: 0_level_2,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err
Best ensemble model,0.407,0.084,0.39,0.073,0.366,0.07,0.26,0.045,0.23,0.048,0.218,0.046,0.737,0.014,0.687,0.013,0.587,0.013,0.575,0.011,0.587,0.012,0.568,0.013


In [46]:
print(to_latex_table(best_models_val_results))
print(to_latex_table(best_models_test_results))

\textbf{general queries}\\
Best ensemble model & 0.383 & 0.364 & 0.333 & 0.226 & 0.183 & 0.177\\
 & \small{±0.040} & \small{±0.036} & \small{±0.033} & \small{±0.020} & \small{±0.019} & \small{±0.018}\\[0.15cm]
\addlinespace
\textbf{specific queries}\\
Best ensemble model & 0.737 & 0.682 & 0.578 & 0.569 & 0.583 & 0.564\\
 & \small{±0.007} & \small{±0.007} & \small{±0.006} & \small{±0.006} & \small{±0.006} & \small{±0.007}\\[0.15cm]

\textbf{general queries}\\
Best ensemble model & 0.407 & 0.390 & 0.366 & 0.260 & 0.230 & 0.218\\
 & \small{±0.084} & \small{±0.073} & \small{±0.070} & \small{±0.045} & \small{±0.048} & \small{±0.046}\\[0.15cm]
\addlinespace
\textbf{specific queries}\\
Best ensemble model & 0.737 & 0.687 & 0.587 & 0.575 & 0.587 & 0.568\\
 & \small{±0.014} & \small{±0.013} & \small{±0.013} & \small{±0.011} & \small{±0.012} & \small{±0.013}\\[0.15cm]



In [26]:
best_models_val_results.to_csv("../../data/results/best_models_val_results.csv")
best_models_test_results.to_csv("../../data/results/best_models_test_results.csv")


In [13]:
best_models_test_results


Unnamed: 0_level_0,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,general keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test,specific keywords test
Unnamed: 0_level_1,p@5,p@5,p@10,p@10,p@20,p@20,R-prec,R-prec,mAP,mAP,bpref,bpref,p@5,p@5,p@10,p@10,p@20,p@20,R-prec,R-prec,mAP,mAP,bpref,bpref
Unnamed: 0_level_2,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err,avg,err
Best BM25 model,0.37,0.081,0.338,0.073,0.322,0.069,0.229,0.042,0.188,0.044,0.185,0.042,0.717,0.014,0.673,0.013,0.578,0.013,0.567,0.011,0.576,0.012,0.561,0.013
Best Sent2Vec model,0.547,0.088,0.498,0.081,0.439,0.078,0.317,0.047,0.289,0.052,0.273,0.049,0.377,0.016,0.324,0.014,0.263,0.012,0.219,0.008,0.191,0.009,0.179,0.008
Best ensemble model,0.443,0.085,0.415,0.072,0.383,0.071,0.28,0.047,0.252,0.05,0.235,0.048,0.728,0.014,0.675,0.013,0.572,0.012,0.551,0.011,0.562,0.012,0.541,0.012


In [15]:
best_models_test_results.to_csv("../../data/results/best_models_test_results.csv")


In [None]:
print(to_latex_table(best_models_test_results))