In [1]:
import pandas as pd
import json 
import numpy as np
from tqdm import tqdm
import ast
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
from clayrs import content_analyzer as ca
from clayrs import recsys as rs
from clayrs import evaluation as eva

In [3]:
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
import math

In [5]:
from scipy.special import kl_div

# Necessary functions

In [8]:
def embeddings_to_df(e):
    e_list = e.tolist()
    df_e = pd.DataFrame(e_list)
    return df_e

In [10]:
def get_performances(train, test, k=10):
    em = eva.EvalModel(
        [train],
        [test],
        metric_list=[
            eva.PrecisionAtK(k, sys_average='macro'),
            # eva.RecallAtK(k, sys_average='macro'),
            # eva.FMeasureAtK(k, sys_average='macro'),
            # eva.MRR(),
            # eva.NDCGAtK(k),
            # eva.CatalogCoverage(catalog),
            # eva.GiniIndex()
        ],    
    )
    sys_result, users_result = em.fit()
    return sys_result, users_result

In [11]:
def triangular_matrix(m):
    m_tri = m.where(np.triu(np.ones(m.shape),k=1).astype(bool))
    return m_tri

In [12]:
def ILS(m):
    m_tri = triangular_matrix(m).stack().reset_index()
    m_tri.columns = ['i','j','similarity']
    ils = (m_tri['similarity'].sum())/len(m_tri)
    return ils

In [13]:
def compute_ils(recos, similarity_matrix, list_users):
    dict_ils = {}
    for u in tqdm(list_users):
        recos_user = recos[recos['user_id']==u]
        news_ids = recos_user['item_id'].tolist()
        news_ids.sort()
        sim_matrix_user = similarity_matrix[similarity_matrix.index.isin(news_ids)][news_ids]
        ils_user = ILS(sim_matrix_user)
        dict_key = {u:ils_user}
        dict_ils.update(dict_key)
    return dict_ils

In [14]:
def get_results_categories(initial_results):
    results_categories = initial_results.copy()
    results_categories = results_categories.rename(columns={'item_id':'NewsID'})
    results_categories = results_categories.merge(news[['NewsID','cluster_hdbscan','proba']], on='NewsID').rename(columns={'cluster_hdbscan':'category'})
    results_categories['proba'] = results_categories['proba'].replace(0, 1)
    return results_categories

In [18]:
def s_recall(recos, list_users, categories_list, k):
    dict_srecall = {}
    nb_categories = len(categories_list)
    for u in tqdm(list_users):
        recos_user = recos[recos['user_id']==u].reset_index(drop=True)
        recos_categories = recos_user['category'].unique().tolist()
        s_recall_user = len(set(recos_categories))/nb_categories
        dict_key = {u:s_recall_user}
        dict_srecall.update(dict_key)
    return dict_srecall

In [20]:
from gensim.matutils import hellinger

In [21]:
def calibration_hellinger(recos, list_users, users_interest_df, categories_list):
    dict_ch = {}
    for u in tqdm(list_users):
        interest_user = users_interest_df.loc[u].values.tolist()
        recos_user = recos[recos['user_id']==u].reset_index(drop=True)
        distrib_categories = []
        for c in categories_list:
            prop_cat = len(recos_user[recos_user['category']==c])
            distrib_categories.append(prop_cat/len(recos_user))
        c_h_user = hellinger(interest_user, distrib_categories).sum()
        dict_key = {u:c_h_user}
        dict_ch.update(dict_key)
    return dict_ch

In [22]:
def homogeneization(distrib, param=0.5):
    n = len(distrib)
    new_distrib = [((1-param)*p)+(param/n) for p in distrib]
    return new_distrib

In [83]:
def calibration_final(recos, users_list, users_interest_df, categories_list):
    recos = get_results_categories(recos)
    dict_ch_smooth = {}
    for u in tqdm(users_list):
        best_ch = 10
        recos_user = recos[recos['user_id']==u].reset_index(drop=True)
        interest_user = users_interest_df.loc[u].values.tolist()
        distrib_cat_recos = []
        for c in categories_list:
            prop_cat = len(recos_user[recos_user['category']==c])
            distrib_cat_recos.append(prop_cat/len(recos_user))
        for l in np.arange(0, 1.1, 0.1):
            new_distrib = homogeneization(interest_user, param=round(l,1))
            c_h = hellinger(distrib_cat_recos,new_distrib)
            if c_h < best_ch:
                best_ch = c_h
                optimal_lambda = l
        dict_key = {u:best_ch}
        dict_ch_smooth.update(dict_key)
    return dict_ch_smooth

In [25]:
def get_all_results(results, test_set, users_list, interest, similarity_matrix, categories_list, name_parameters, k=10):
    print('Pre-processing...')
    results_categories = get_results_categories(results)  
    results_ratings = ca.Ratings.from_dataframe(results)
    test_ratings = ca.Ratings.from_dataframe(test_set)
    print('OK!')
    #Accuracy
    print('Accuracy metrics...')
    sys_results, users_results = get_performances(results_ratings, test_ratings, k=k)
    print('OK!')
    #Instantiate the dataframe with global results
    eval_results_global = sys_results.reset_index().copy()
    eval_results_global = eval_results_global[eval_results_global['user_id']=='sys - fold1']
    eval_results_global['user_id']=[name_parameters]
    eval_results_global = eval_results_global.rename(columns={'user_id':'a_value'})
    eval_results_global = eval_results_global.set_index('a_value')
    # eval_results_global = eval_results_global.rename(columns={'Precision@10 - macro':'Precision', 'Recall@10 - macro':'Recall', 'F1@10 - macro':'F1 score', 'NDCG@10':'NDCG'})
    eval_results_global = eval_results_global.rename(columns={'Precision@10 - macro':'Precision'})


    #Instantiate the dataframe with individual results
    eval_results_indiv = users_results.copy()
    eval_results_indiv.index = eval_results_indiv.index.astype(int)
    # eval_results_indiv = eval_results_indiv.rename(columns={'Precision@10 - macro':'Precision', 'Recall@10 - macro':'Recall', 'F1@10 - macro':'F1 score', 'NDCG@10':'NDCG'})
    eval_results_indiv = eval_results_indiv.rename(columns={'Precision@10 - macro':'Precision'})


    print('ILS...')
    #ILS
    dict_ils = compute_ils(results, similarity_matrix, users_list)
    eval_results_global['ILS'] = np.mean(list(dict_ils.values()))
    eval_results_indiv['ILS'] = eval_results_indiv.index.map(dict_ils)
    print('OK!')    
    # print('alpha NDCG')
    # #alpha-NDCG
    # dict_alpha_ndcg = alpha_ndcg(results_categories, users_list, interest, alpha=0.5, k=k)
    # eval_results_global['alpha_ndcg'] = np.mean(list(dict_alpha_ndcg.values()))
    # eval_results_indiv['alpha_ndcg'] = eval_results_indiv.index.map(dict_alpha_ndcg)
    # print('OK!')
    # print('NDCG IA')
    # #NDCG-IA
    # dict_ndcg_ia = ndcg_ia(results_categories, users_list, interest, k=k)
    # eval_results_global['ndcg_ia'] = np.mean(list(dict_ndcg_ia.values()))
    # eval_results_indiv['ndcg_ia'] = eval_results_indiv.index.map(dict_ndcg_ia)
    # print('OK!')
    print('S-Recall')
    #S-Recall
    dict_srecall = s_recall(results_categories, users_list, categories_list, k=k)
    eval_results_global['s_recall'] = np.mean(list(dict_srecall.values()))
    eval_results_indiv['s_recall'] = eval_results_indiv.index.map(dict_srecall)
    print('OK!')
    # print('Calibration KL')
    # #C_KL
    # dict_ckl = calibration_kl(results_categories, users_list, interest, categories_list, alpha=0.01)
    # eval_results_global['c_kl'] = np.mean(list(dict_ckl.values()))
    # eval_results_indiv['c_kl'] = eval_results_indiv.index.map(dict_ckl)
    # print('OK!')
    print('Calibration Hellinger')
    #C_KL
    dict_ch = calibration_hellinger(results_categories, users_list, interest, categories_list)
    eval_results_global['c_hell'] = np.mean(list(dict_ch.values()))
    eval_results_indiv['c_hell'] = eval_results_indiv.index.map(dict_ch)
    print('OK!')

    # eval_results_global.columns = ['Precision', 'Recall', 'F1-score', 'MRR', 'NDCG', 'ILS', 'alpha_ndcg', 'ndcg_ia', 's_recall', 'c_kl', 'c_hell']
    eval_results_global.columns = ['Precision','ILS', 's_recall','C_h']

    eval_results_global = eval_results_global.round(3)

    # eval_results_indiv.columns = ['Precision', 'Recall', 'F1-score', 'NDCG', 'ILS', 'alpha_ndcg', 'ndcg_ia', 's_recall', 'c_kl', 'c_hell']
    eval_results_indiv.columns = ['Precision','ILS', 's_recall','C_h']

    eval_results_indiv = eval_results_indiv.round(3)
    
    return eval_results_global, eval_results_indiv

# Necessary data

In [30]:
results_baseline_k20 = pd.read_csv('../reco/report_baseline_10k_cv_k20/CentroidVector_1/rs_rank_split0.csv')
test_baseline_k20 = pd.read_csv('../reco/report_baseline_10k_cv_k20/HoldOutPartitioning_test_split0.csv')

In [31]:
results_greedy_0 = pd.read_csv('../re_ranking/greedy/greedy_lambda_0.csv')
results_greedy_01 = pd.read_csv('../re_ranking/greedy/greedy_lambda_01.csv')
results_greedy_02 = pd.read_csv('../re_ranking/greedy/greedy_lambda_02.csv')
results_greedy_03 = pd.read_csv('../re_ranking/greedy/greedy_lambda_03.csv')
results_greedy_04 = pd.read_csv('../re_ranking/greedy/greedy_lambda_04.csv')
results_greedy_05 = pd.read_csv('../re_ranking/greedy/greedy_lambda_05.csv')
results_greedy_06 = pd.read_csv('../re_ranking/greedy/greedy_lambda_06.csv')
results_greedy_07 = pd.read_csv('../re_ranking/greedy/greedy_lambda_07.csv')
results_greedy_08 = pd.read_csv('../re_ranking/greedy/greedy_lambda_08.csv')
results_greedy_09 = pd.read_csv('../re_ranking/greedy/greedy_lambda_09.csv')
results_greedy_1 = pd.read_csv('../re_ranking/greedy/greedy_lambda_1.csv')

In [32]:
results_a_0_k20 = pd.read_csv('../re_ranking/ADF/div_a_0_k20.csv')
results_a_0_k20 = results_a_0_k20[['user_id','item_id','score']]

results_a_01_k20 = pd.read_csv('../re_ranking/ADF/div_a_01_k20.csv')
results_a_01_k20 = results_a_01_k20[['user_id','item_id','score']]

results_a_02_k20 = pd.read_csv('../re_ranking/ADF/div_a_02_k20.csv')
results_a_02_k20 = results_a_02_k20[['user_id','item_id','score']]

results_a_03_k20 = pd.read_csv('../re_ranking/ADF/div_a_03_k20.csv')
results_a_03_k20 = results_a_03_k20[['user_id','item_id','score']]

results_a_04_k20 = pd.read_csv('../re_ranking/ADF/div_a_04_k20.csv')
results_a_04_k20 = results_a_04_k20[['user_id','item_id','score']]

results_a_05_k20 = pd.read_csv('../re_ranking/ADF/div_a_05_k20.csv')
results_a_05_k20 = results_a_05_k20[['user_id','item_id','score']]

results_a_06_k20 = pd.read_csv('../re_ranking/ADF/div_a_06_k20.csv')
results_a_06_k20 = results_a_06_k20[['user_id','item_id','score']]

results_a_07_k20 = pd.read_csv('../re_ranking/ADF/div_a_07_k20.csv')
results_a_07_k20 = results_a_07_k20[['user_id','item_id','score']]

results_a_08_k20 = pd.read_csv('../re_ranking/ADF/div_a_08_k20.csv')
results_a_08_k20 = results_a_08_k20[['user_id','item_id','score']]

results_a_09_k20 = pd.read_csv('../re_ranking/ADF/div_a_09_k20.csv')
results_a_09_k20 = results_a_09_k20[['user_id','item_id','score']]

results_a_1_k20 = pd.read_csv('../re_ranking/ADF/div_a_1_k20.csv')
results_a_1_k20 = results_a_1_k20[['user_id','item_id','score']]

In [81]:
entropy_a0 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_0_k20.csv', index_col=0)
entropy_a01 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_01_k20.csv', index_col=0)
entropy_a02 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_02_k20.csv', index_col=0)
entropy_a03 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_03_k20.csv', index_col=0)
entropy_a04 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_04_k20.csv', index_col=0)
entropy_a05 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_05_k20.csv', index_col=0)
entropy_a06 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_06_k20.csv', index_col=0)
entropy_a07 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_07_k20.csv', index_col=0)
entropy_a08 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_08_k20.csv', index_col=0)
entropy_a09 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_09_k20.csv', index_col=0)
entropy_a1 = pd.read_csv('../re_ranking/ADF/entropy/entropy_a_1_k20.csv', index_col=0)

In [82]:
results_glob_a06 = pd.read_csv('../re_ranking/no_pers/div_global_0.6_k20.csv')
results_glob_a06 = results_glob_a06[['user_id','item_id','score']]

results_glob_a07 = pd.read_csv('../re_ranking/no_pers/div_global_0.7_k20.csv')
results_glob_a07 = results_glob_a07[['user_id','item_id','score']]

results_glob_a08 = pd.read_csv('../re_ranking/no_pers/div_global_0.8_k20.csv')
results_glob_a08 = results_glob_a08[['user_id','item_id','score']]

results_glob_a09 = pd.read_csv('../re_ranking/no_pers/div_global_0.9_k20.csv')
results_glob_a09 = results_glob_a09[['user_id','item_id','score']]

results_glob_a1 = pd.read_csv('../re_ranking/no_pers/div_global_1_k20.csv')
results_glob_a1 = results_glob_a1[['user_id','item_id','score']]

In [34]:
test_set = pd.read_csv('../reco/report_baseline_10k_cv_complete/HoldOutPartitioning_test_split0.csv')

In [35]:
users_list = [int(i) for i in test_set['user_id'].unique().tolist()]

In [38]:
# News info
news = pd.read_csv('../data/news_thematic_clustering_large_final.csv', index_col=0)

In [39]:
news['cluster_hdbscan'] = news['cluster_hdbscan']+1

In [40]:
# Embeddings df
embeddings_lda_128 = pd.read_json('../reco/news_codified_lda_128/contents.json')
embeddings_lda_128['lda_128#0'] = embeddings_lda_128['lda_128#0'].apply(ast.literal_eval)
news_embeddings_lda = embeddings_to_df(embeddings_lda_128['lda_128#0'])
news_embeddings_lda.index = embeddings_lda_128['content_id']

In [41]:
users_interest = pd.read_csv('../user_profile/categories_distribution_subprofiles_10k.csv', index_col=0)

In [42]:
users_interest.columns = users_interest.columns.astype(int)

In [43]:
similarity_matrix = pd.DataFrame(cosine_similarity(news_embeddings_lda))

In [44]:
similarity_matrix.index = news_embeddings_lda.index.tolist()
similarity_matrix.columns = news_embeddings_lda.index.tolist()

In [45]:
categories_list = users_interest.columns.tolist()

# Baseline

In [46]:
eval_baseline_k20_global, eval_baseline_k20_ind = get_all_results(results_baseline_k20, test_baseline_k20, users_list, users_interest, similarity_matrix, categories_list, 'baseline_k20', k=20)

Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:27<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:20<00:00, 124.06it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:06<00:00, 1519.77it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:40<00:00, 248.55it/s]

OK!





In [85]:
ch_baseline = calibration_final(results_baseline_k20, users_list, users_interest, categories_list)

100%|██████████| 10000/10000 [01:12<00:00, 137.16it/s]


In [86]:
chl_baseline = np.mean(list(ch_baseline.values())).round(3)

In [87]:
eval_baseline_k20_global['C_h'] = chl_baseline

In [88]:
eval_baseline_k20_global

Unnamed: 0_level_0,Precision,ILS,s_recall,C_h
a_value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baseline_k20,0.224,0.55,0.383,0.36


In [49]:
# eval_baseline_k20_global.to_csv('baseline/complete_baseline.csv')

In [50]:
# eval_baseline_k20_ind.to_csv('baseline/ind_baseline.csv')

# ADF

In [51]:
eval_a_0_k20_global, eval_a_0_k20_ind = get_all_results(results_a_0_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_0_k20', k=20)
eval_a_01_k20_global, eval_a_01_k20_ind = get_all_results(results_a_01_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_01_k20', k=20)
eval_a_02_k20_global, eval_a_02_k20_ind = get_all_results(results_a_02_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_02_k20', k=20)
eval_a_03_k20_global, eval_a_03_k20_ind = get_all_results(results_a_03_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_03_k20', k=20)
eval_a_04_k20_global, eval_a_04_k20_ind = get_all_results(results_a_04_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_04_k20', k=20)
eval_a_05_k20_global, eval_a_05_k20_ind = get_all_results(results_a_05_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_05_k20', k=20)
eval_a_06_k20_global, eval_a_06_k20_ind = get_all_results(results_a_06_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_06_k20', k=20)
eval_a_07_k20_global, eval_a_07_k20_ind = get_all_results(results_a_07_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_07_k20', k=20)
eval_a_08_k20_global, eval_a_08_k20_ind = get_all_results(results_a_08_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_08_k20', k=20)
eval_a_09_k20_global, eval_a_09_k20_ind = get_all_results(results_a_09_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_09_k20', k=20)
eval_a_1_k20_global, eval_a_1_k20_ind = get_all_results(results_a_1_k20, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_1_k20', k=20)

Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:20<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:48<00:00, 92.17it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:06<00:00, 1501.86it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:39<00:00, 252.77it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:24<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:33<00:00, 107.00it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:08<00:00, 1206.51it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:46<00:00, 217.29it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:33<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:43<00:00, 96.82it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:08<00:00, 1206.07it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:35<00:00, 279.27it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:27<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [02:09<00:00, 77.09it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:10<00:00, 967.36it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:54<00:00, 184.13it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:26<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [02:08<00:00, 78.08it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:16<00:00, 606.68it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:51<00:00, 194.75it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:25<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:41<00:00, 98.45it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:13<00:00, 732.68it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:39<00:00, 100.02it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:30<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:27<00:00, 114.22it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:12<00:00, 829.75it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:22<00:00, 120.66it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:40<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:23<00:00, 120.14it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:09<00:00, 1016.17it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:54<00:00, 182.92it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:37<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:49<00:00, 90.94it/s] 


OK!
S-Recall


100%|██████████| 10000/10000 [00:07<00:00, 1348.36it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:41<00:00, 240.37it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:24<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:41<00:00, 98.29it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:19<00:00, 500.58it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:02<00:00, 160.49it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:20<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:16<00:00, 131.14it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:09<00:00, 1036.96it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:22<00:00, 121.61it/s]

OK!





In [57]:
c_hellinger_a0 = calibration_hellinger_target(entropy_a0, users_list, users_interest)
c_hellinger_a01 = calibration_hellinger_target(entropy_a01, users_list, users_interest)
c_hellinger_a02 = calibration_hellinger_target(entropy_a02, users_list, users_interest)
c_hellinger_a03 = calibration_hellinger_target(entropy_a03, users_list, users_interest)
c_hellinger_a04 = calibration_hellinger_target(entropy_a04, users_list, users_interest)
c_hellinger_a05 = calibration_hellinger_target(entropy_a05, users_list, users_interest)
c_hellinger_a06 = calibration_hellinger_target(entropy_a06, users_list, users_interest)
c_hellinger_a07 = calibration_hellinger_target(entropy_a07, users_list, users_interest)
c_hellinger_a08 = calibration_hellinger_target(entropy_a08, users_list, users_interest)
c_hellinger_a09 = calibration_hellinger_target(entropy_a09, users_list, users_interest)
c_hellinger_a1 = calibration_hellinger_target(entropy_a1, users_list, users_interest)

100%|██████████| 10000/10000 [00:01<00:00, 8731.72it/s]
100%|██████████| 10000/10000 [00:01<00:00, 8157.38it/s]
100%|██████████| 10000/10000 [00:01<00:00, 6418.06it/s]
100%|██████████| 10000/10000 [00:01<00:00, 5803.80it/s]
100%|██████████| 10000/10000 [00:01<00:00, 6239.88it/s]
100%|██████████| 10000/10000 [00:01<00:00, 7779.49it/s]
100%|██████████| 10000/10000 [00:01<00:00, 7858.08it/s]
100%|██████████| 10000/10000 [00:01<00:00, 8164.77it/s]
100%|██████████| 10000/10000 [00:01<00:00, 7177.62it/s]
100%|██████████| 10000/10000 [00:01<00:00, 6247.58it/s]
100%|██████████| 10000/10000 [00:01<00:00, 5717.04it/s]


In [70]:
chl_a0 = abs(eval_a_0_k20_global['C_h'][0] - np.mean(list(c_hellinger_a0.values()))).round(3)
chl_a01 = abs(eval_a_01_k20_global['C_h'][0] - np.mean(list(c_hellinger_a01.values()))).round(3)
chl_a02 = abs(eval_a_02_k20_global['C_h'][0] - np.mean(list(c_hellinger_a02.values()))).round(3)
chl_a03 = abs(eval_a_03_k20_global['C_h'][0] - np.mean(list(c_hellinger_a03.values()))).round(3)
chl_a04 = abs(eval_a_04_k20_global['C_h'][0] - np.mean(list(c_hellinger_a04.values()))).round(3)
chl_a05 = abs(eval_a_05_k20_global['C_h'][0] - np.mean(list(c_hellinger_a05.values()))).round(3)
chl_a06 = abs(eval_a_06_k20_global['C_h'][0] - np.mean(list(c_hellinger_a06.values()))).round(3)
chl_a07 = abs(eval_a_07_k20_global['C_h'][0] - np.mean(list(c_hellinger_a07.values()))).round(3)
chl_a08 = abs(eval_a_08_k20_global['C_h'][0] - np.mean(list(c_hellinger_a08.values()))).round(3)
chl_a09 = abs(eval_a_09_k20_global['C_h'][0] - np.mean(list(c_hellinger_a09.values()))).round(3)
chl_a1 = abs(eval_a_1_k20_global['C_h'][0] - np.mean(list(c_hellinger_a1.values()))).round(3)

In [72]:
chl = [chl_a0, chl_a01, chl_a02, chl_a03, chl_a04, chl_a05, chl_a06, chl_a07, chl_a08, chl_a09, chl_a1]

In [74]:
complete_results = pd.concat([eval_a_0_k20_global, eval_a_01_k20_global, eval_a_02_k20_global, eval_a_03_k20_global, eval_a_04_k20_global, eval_a_05_k20_global, eval_a_06_k20_global, eval_a_07_k20_global, eval_a_08_k20_global, eval_a_09_k20_global, eval_a_1_k20_global])

In [76]:
complete_results['C_h'] = chl

In [77]:
complete_results = complete_results.iloc[::-1] 

In [78]:
complete_results.index = np.arange(0, 1.1, 0.1)

In [79]:
complete_results

Unnamed: 0,Precision,ILS,s_recall,C_h
0.0,0.227,0.438,0.525,0.141
0.1,0.225,0.43,0.552,0.038
0.2,0.225,0.428,0.559,0.03
0.3,0.222,0.419,0.588,0.016
0.4,0.22,0.407,0.622,0.055
0.5,0.217,0.398,0.646,0.089
0.6,0.215,0.393,0.661,0.11
0.7,0.176,0.298,1.0,0.072
0.8,0.171,0.291,1.0,0.041
0.9,0.162,0.276,1.0,0.003


In [80]:
complete_results.to_csv('ADF/ADF_complete.csv')

# No personalization

In [304]:
eval_a_06_k20_nopers, eval_a_06_k20_ind_nopers = get_all_results(results_glob_a06, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_06_k20_no_pers', k=20)
eval_a_07_k20_nopers, eval_a_07_k20_ind_nopers = get_all_results(results_glob_a07, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_07_k20_no_pers', k=20)
eval_a_08_k20_nopers, eval_a_08_k20_ind_nopers = get_all_results(results_glob_a08, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_08_k20_no_pers', k=20)
eval_a_09_k20_nopers, eval_a_09_k20_ind_nopers = get_all_results(results_glob_a09, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_09_k20_no_pers', k=20)
eval_a_1_k20_nopers, eval_a_1_k20_ind_nopers = get_all_results(results_glob_a1, test_set, users_list, users_interest, similarity_matrix, categories_list, 'a_1_k20_no_pers', k=20)

Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


  return actual / ideal
Performing NDCG@20:  100%|██████████| 5/5 [01:55<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [02:56<00:00, 56.57it/s]


OK!
alpha NDCG


100%|██████████| 10000/10000 [00:23<00:00, 428.31it/s]


OK!
NDCG IA


100%|██████████| 10000/10000 [00:29<00:00, 340.87it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:12<00:00, 787.20it/s]


OK!
Calibration KL


100%|██████████| 10000/10000 [01:03<00:00, 156.97it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:02<00:00, 158.78it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


  return actual / ideal
Performing NDCG@20:  100%|██████████| 5/5 [02:04<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:57<00:00, 85.01it/s]


OK!
alpha NDCG


100%|██████████| 10000/10000 [00:21<00:00, 471.06it/s]


OK!
NDCG IA


100%|██████████| 10000/10000 [00:28<00:00, 350.64it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:12<00:00, 803.44it/s]


OK!
Calibration KL


100%|██████████| 10000/10000 [01:08<00:00, 146.25it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:57<00:00, 173.42it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


  return actual / ideal
Performing NDCG@20:  100%|██████████| 5/5 [02:21<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:51<00:00, 89.94it/s]


OK!
alpha NDCG


100%|██████████| 10000/10000 [00:21<00:00, 462.12it/s]


OK!
NDCG IA


100%|██████████| 10000/10000 [00:25<00:00, 395.85it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:09<00:00, 1011.34it/s]


OK!
Calibration KL


100%|██████████| 10000/10000 [01:02<00:00, 159.76it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:01<00:00, 161.52it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


  return actual / ideal
Performing NDCG@20:  100%|██████████| 5/5 [02:08<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:54<00:00, 87.30it/s]


OK!
alpha NDCG


100%|██████████| 10000/10000 [00:22<00:00, 449.04it/s]


OK!
NDCG IA


100%|██████████| 10000/10000 [00:32<00:00, 304.54it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:15<00:00, 638.65it/s]


OK!
Calibration KL


100%|██████████| 10000/10000 [01:16<00:00, 130.80it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:09<00:00, 144.69it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


  return actual / ideal
Performing NDCG@20:  100%|██████████| 5/5 [02:19<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:54<00:00, 87.64it/s]


OK!
alpha NDCG


100%|██████████| 10000/10000 [00:26<00:00, 383.94it/s]


OK!
NDCG IA


100%|██████████| 10000/10000 [00:29<00:00, 340.62it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:11<00:00, 838.24it/s]


OK!
Calibration KL


100%|██████████| 10000/10000 [00:59<00:00, 169.09it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:01<00:00, 163.31it/s]


OK!


In [305]:
complete_results_no_pers = pd.concat([eval_a_06_k20_nopers, eval_a_07_k20_nopers, eval_a_08_k20_nopers, eval_a_09_k20_nopers, eval_a_1_k20_nopers])

In [306]:
complete_results_no_pers

Unnamed: 0_level_0,Precision,Recall,F1-score,MRR,NDCG,ILS,alpha_ndcg,ndcg_ia,s_recall,c_kl,c_hell
a_value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
a_06_k20_no_pers,0.226,0.63,0.28,0.424,0.564,0.433,0.873,0.56,0.528,0.131,0.139
a_07_k20_no_pers,0.223,0.623,0.276,0.422,0.564,0.423,0.874,0.559,0.558,0.114,0.134
a_08_k20_no_pers,0.211,0.598,0.262,0.418,0.565,0.385,0.858,0.55,0.66,0.115,0.157
a_09_k20_no_pers,0.175,0.534,0.22,0.405,0.567,0.298,0.809,0.519,0.93,0.316,0.349
a_1_k20_no_pers,0.123,0.373,0.153,0.374,0.576,0.206,0.676,0.438,0.945,0.82,0.522


In [307]:
complete_results_no_pers.to_csv('no_pers/complete_no_pers.csv')

# Greedy

## Compute performances

In [92]:
eval_greedy_0, eval_greedy_0_ind = get_all_results(results_greedy_0, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_0', k=20)
eval_greedy_01, eval_greedy_01_ind = get_all_results(results_greedy_01, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_01', k=20)
eval_greedy_02, eval_greedy_02_ind = get_all_results(results_greedy_02, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_02', k=20)
eval_greedy_03, eval_greedy_03_ind = get_all_results(results_greedy_03, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_03', k=20)
eval_greedy_04, eval_greedy_04_ind = get_all_results(results_greedy_04, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_04', k=20)
eval_greedy_05, eval_greedy_05_ind = get_all_results(results_greedy_05, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_05', k=20)
eval_greedy_06, eval_greedy_06_ind = get_all_results(results_greedy_06, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_06', k=20)
eval_greedy_07, eval_greedy_07_ind = get_all_results(results_greedy_07, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_07', k=20)
eval_greedy_08, eval_greedy_08_ind = get_all_results(results_greedy_08, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_08', k=20)
eval_greedy_09, eval_greedy_09_ind = get_all_results(results_greedy_09, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_09', k=20)
eval_greedy_1, eval_greedy_1_ind = get_all_results(results_greedy_1, test_set, users_list, users_interest, similarity_matrix, categories_list, 'greedy_1', k=20)

Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:24<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:43<00:00, 96.37it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:07<00:00, 1309.44it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:43<00:00, 229.17it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:25<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:22<00:00, 121.20it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:13<00:00, 715.51it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:59<00:00, 167.86it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:24<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:09<00:00, 143.99it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:08<00:00, 1235.62it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:51<00:00, 194.16it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:33<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:14<00:00, 134.59it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:07<00:00, 1381.75it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:42<00:00, 236.55it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:28<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:34<00:00, 105.33it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:09<00:00, 1010.69it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:42<00:00, 235.45it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:24<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:16<00:00, 130.98it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:08<00:00, 1160.59it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [01:05<00:00, 152.81it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:24<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:09<00:00, 143.30it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:07<00:00, 1348.87it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:49<00:00, 203.65it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:29<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:36<00:00, 103.81it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:07<00:00, 1289.03it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:43<00:00, 229.20it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:25<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:31<00:00, 109.57it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:12<00:00, 780.78it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:58<00:00, 171.80it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:24<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:13<00:00, 136.62it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:09<00:00, 1084.70it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:56<00:00, 176.33it/s]


OK!
Pre-processing...


[39mINFO[0m - Performing evaluation on metrics chosen


OK!
Accuracy metrics...


Performing Precision@20 - macro:  100%|██████████| 1/1 [00:32<00:00]


OK!
ILS...


100%|██████████| 10000/10000 [01:28<00:00, 113.43it/s]


OK!
S-Recall


100%|██████████| 10000/10000 [00:06<00:00, 1541.52it/s]


OK!
Calibration Hellinger


100%|██████████| 10000/10000 [00:37<00:00, 267.24it/s]

OK!





In [93]:
complete_results_greedy = pd.concat([eval_greedy_0, eval_greedy_01, eval_greedy_02, eval_greedy_03, eval_greedy_04, eval_greedy_05, eval_greedy_06, eval_greedy_07, eval_greedy_08, eval_greedy_09, eval_greedy_1])

In [99]:
ch_greedy0 = calibration_final(results_greedy_0, users_list, users_interest, categories_list)
ch_greedy01 = calibration_final(results_greedy_01, users_list, users_interest, categories_list)
ch_greedy02 = calibration_final(results_greedy_02, users_list, users_interest, categories_list)
ch_greedy03 = calibration_final(results_greedy_03, users_list, users_interest, categories_list)
ch_greedy04 = calibration_final(results_greedy_04, users_list, users_interest, categories_list)
ch_greedy05 = calibration_final(results_greedy_05, users_list, users_interest, categories_list)
ch_greedy06 = calibration_final(results_greedy_06, users_list, users_interest, categories_list)
ch_greedy07 = calibration_final(results_greedy_07, users_list, users_interest, categories_list)
ch_greedy08 = calibration_final(results_greedy_08, users_list, users_interest, categories_list)
ch_greedy09 = calibration_final(results_greedy_09, users_list, users_interest, categories_list)
ch_greedy1 = calibration_final(results_greedy_1, users_list, users_interest, categories_list)

  0%|          | 0/10000 [00:00<?, ?it/s]

100%|██████████| 10000/10000 [00:53<00:00, 187.65it/s]
100%|██████████| 10000/10000 [00:54<00:00, 184.30it/s]
100%|██████████| 10000/10000 [00:42<00:00, 234.22it/s]
100%|██████████| 10000/10000 [00:57<00:00, 173.54it/s]
100%|██████████| 10000/10000 [00:44<00:00, 223.21it/s]
100%|██████████| 10000/10000 [01:08<00:00, 146.95it/s]
100%|██████████| 10000/10000 [01:52<00:00, 89.13it/s]
100%|██████████| 10000/10000 [01:13<00:00, 136.64it/s]
100%|██████████| 10000/10000 [01:23<00:00, 119.25it/s]
100%|██████████| 10000/10000 [01:09<00:00, 144.21it/s]
100%|██████████| 10000/10000 [01:26<00:00, 115.64it/s]


In [100]:
chl_greedy0 = np.mean(list(ch_greedy0.values())).round(3)
chl_greedy01 = np.mean(list(ch_greedy01.values())).round(3)
chl_greedy02 = np.mean(list(ch_greedy02.values())).round(3)
chl_greedy03 = np.mean(list(ch_greedy03.values())).round(3)
chl_greedy04 = np.mean(list(ch_greedy04.values())).round(3)
chl_greedy05 = np.mean(list(ch_greedy05.values())).round(3)
chl_greedy06 = np.mean(list(ch_greedy06.values())).round(3)
chl_greedy07 = np.mean(list(ch_greedy07.values())).round(3)
chl_greedy08 = np.mean(list(ch_greedy08.values())).round(3)
chl_greedy09 = np.mean(list(ch_greedy09.values())).round(3)
chl_greedy1 = np.mean(list(ch_greedy1.values())).round(3)

In [101]:
chl_greedy_complete = [chl_greedy0, chl_greedy01, chl_greedy02, chl_greedy03, chl_greedy04, chl_greedy05, chl_greedy06, chl_greedy07, chl_greedy08, chl_greedy09, chl_greedy1]

In [102]:
chl_greedy_complete

[0.36, 0.36, 0.359, 0.359, 0.356, 0.349, 0.348, 0.344, 0.335, 0.316, 0.37]

In [103]:
complete_results_greedy['C_h'] = chl_greedy_complete

In [104]:
complete_results_greedy

Unnamed: 0_level_0,Precision,ILS,s_recall,C_h
a_value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
greedy_0,0.224,0.55,0.383,0.36
greedy_01,0.224,0.55,0.383,0.36
greedy_02,0.224,0.549,0.383,0.359
greedy_03,0.225,0.548,0.384,0.359
greedy_04,0.225,0.545,0.386,0.356
greedy_05,0.224,0.528,0.395,0.349
greedy_06,0.223,0.519,0.399,0.348
greedy_07,0.223,0.508,0.406,0.344
greedy_08,0.222,0.475,0.427,0.335
greedy_09,0.217,0.386,0.492,0.316


In [105]:
complete_results_greedy.to_csv('greedy/complete_greedy.csv')