# Evaluation Notebook 

In [48]:
%load_ext autoreload
%autoreload 2
import json
from gensim.summarization.bm25 import BM25
from gensim.models.phrases import Phrases, Phraser
import pandas as pd
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 1000)
import numpy as np
from multiprocessing import Pool
from functools import partial
import spacy

from tqdm.notebook import tqdm
tqdm.pandas()

import sys
import logging
logging.basicConfig(level=logging.INFO, stream=sys.stdout)


from bm25_retrieval import BM25Retrieval
from evaluation import average_precision, mean_average_precision, mean_average_precision_parallel
from preprocessing import apply_pipeline, Corpus, BasicPreprocessing, BigramPreprocessor, SpacyPreprocessor

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Load corpus using different preprocessing pipelines

In [43]:
base_file =  "../data/kit_expert_2017_papers.csv"

p = [BasicPreprocessing()]
papers_basic = Corpus(base_file, p)

p = [BasicPreprocessing(), BigramPreprocessor()]
papers_basic_bigram = Corpus(base_file, p)

p = [BasicPreprocessing(), SpacyPreprocessor(lemmatization=True)]
papers_basic_lemmatization = Corpus(base_file, p, load_from_cache=True)

p = [BasicPreprocessing(), SpacyPreprocessor(lemmatization=True), BigramPreprocessor()]
papers_basic_lemmatization_bigram = Corpus(base_file, p)

# p = [BasicPreprocessing(), SpacyPreprocessor(combine_noun_chunks=True)]
# papers_basic_nounchunk = Corpus(base_file, p)

# p = [BasicPreprocessing(), SpacyPreprocessor(lemmatization=True, combine_noun_chunks=True)] 
# papers_basic_lemmatization_nounchunk = Corpus(base_file, p)

INFO:root:Start preprocessing pipeline "basic" for file ../data/kit_expert_2017_papers.csv.
INFO:root:Loaded cached preprocessed corpus from ../data/kit_expert_2017_papers_basic
INFO:root:Start preprocessing pipeline "basic_bigrams" for file ../data/kit_expert_2017_papers.csv.
INFO:root:Loaded cached preprocessed corpus from ../data/kit_expert_2017_papers_basic_bigrams
INFO:root:Start preprocessing pipeline "basic_spacy_lemmatization" for file ../data/kit_expert_2017_papers.csv.
INFO:root:Loaded cached preprocessed corpus from ../data/kit_expert_2017_papers_basic_spacy_lemmatization
INFO:root:Start preprocessing pipeline "basic_spacy_lemmatization_bigrams" for file ../data/kit_expert_2017_papers.csv.
INFO:root:Loaded cached preprocessed corpus from ../data/kit_expert_2017_papers_basic_spacy_lemmatization_bigrams


#### Load keywords to use as test data

In [44]:
with open("../data/kit_expert_2017_keywords.json", "r") as file:
    keywords = json.load(file)

#### Define models to evaluate

In [55]:
models = [
    ("BM25 unigrams", partial(BM25Retrieval, corpus = papers_basic)),
    ("BM25 bigrams", partial(BM25Retrieval, corpus = papers_basic_bigram)),
    ("BM25 lemmatization unigrams", partial(BM25Retrieval, corpus = papers_basic_lemmatization)),
    ("BM25 lemmatization bigrams", partial(BM25Retrieval, corpus = papers_basic_lemmatization_bigram)),
#     ("BM25 nounchunk", partial(BM25Retrieval, corpus = papers_basic_nounchunk)),
#     ("BM25 lemmatization nounchunk", partial(BM25Retrieval, corpus = papers_basic_lemmatization_nounchunk)),
]

#### Define test sets 

In [52]:
general_keywords = [k for k in keywords if k["level"]<=1]
specific_keywords = [k for k in keywords if k["level"]>=2]
test_sets = [("general keywords", general_keywords), ("specific_keywords", specific_keywords)]

#### Evaluate Models

In [56]:
results = {}
for model_name, model_factory in tqdm(models):
    model_instance = model_factory()
    results[model_name] = {}
    for test_set_name, test_set in test_sets:
        data = [{
            "query": keyword_info["keyword"],
            "documents": keyword_info["paper_ids"]
        } for keyword_info in test_set[:1000]]
        mAP = mean_average_precision(model_instance, data)
        results[model_name][test_set_name] = mAP
print("mAP scores for models:")
pd.DataFrame.from_dict(results, orient="index")

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))


mAP scores for models:


Unnamed: 0,general keywords,specific_keywords
BM25 unigrams,0.056652,0.514452
BM25 bigrams,0.038118,0.456968
BM25 lemmatization unigrams,0.058848,0.512645
BM25 lemmatization bigrams,0.040045,0.440767
