In [1]:
import sys
sys.path.append('..')

In [None]:
from Aspect_Sparse_Baselines import OWC, TFIDF, BM25
import numpy as np
import json
from helper import *
import pandas as pd

In [3]:
config = load_config()
train_splits, val_splits, test_splits = load_data(config)

In [4]:
def owc_eval(test_data, agg_fcn):
    OWC_baseline = OWC(test_data)
    score_fcn = OWC_baseline.owc_score
    OWC_baseline.clean_data()
    
    return OWC_baseline.get_results(score_fcn, agg_fcn)

def tfidf_eval(test_data, agg_fcn):
    TFIDF_baseline = TFIDF(test_data)
    score_fcn = TFIDF_baseline.tfidf_score
    cleaned_descriptions = TFIDF_baseline.clean_data()
    TFIDF_baseline.calc_df(cleaned_descriptions)

    return TFIDF_baseline.get_results(score_fcn, agg_fcn)

def bm25_eval(test_data, agg_fcn):
    BM25_baseline = BM25(test_data)
    score_fcn = BM25_baseline.bm25_score
    cleaned_descriptions = BM25_baseline.clean_data()
    BM25_baseline.fit(cleaned_descriptions)
    
    return BM25_baseline.get_results(score_fcn, agg_fcn)

In [5]:
def custom_gmean(lst):
  gmean = np.prod(lst)**(1/len(lst))
  return gmean

agg_fcns = {"min":min, "max":max, "amean":np.mean, "gmean":custom_gmean}

In [8]:
def evaluate(name, method):
    for agg_name, agg_fcn in agg_fcns.items():
        results = []
        for i in range(len(train_splits)):
            test_data = test_splits[i]
            correct, total, type_correct, type_count = method(test_data, agg_fcn)

            for key, val in type_correct.items():
                type_correct[key] = val*100/type_count[key]
            type_correct.update({"All":correct*100/total})
            results.append(type_correct)

        df = pd.DataFrame(results)
        df.to_csv(name+"_"+agg_name+".csv")

In [9]:
evaluate("owc", owc_eval)
evaluate("tfidf", tfidf_eval)
evaluate("bm25", bm25_eval)