## Load dataset

In [1]:
import glob
import pandas as pd

files = glob.glob('../evaluate_results/outer/*.csv', recursive=True)

print(len(files), 'files')
files[0:3]

160 files


["../evaluate_results/outer/KNeighborsClassifier-split_x_y_with_projection-{'n_neighbors': [1, 5, 10, 15, 20, 25, 40, 60, 80, 100], 'algorithm': ['brute'], 'metric': ['hamming']}-(3 of 5).csv",
 "../evaluate_results/outer/SVC-split_x_y_normalized_function-{'C': [1e-05, 0.001, 0.1, 10.0, 1000.0, 100000.0], 'gamma': [1e-05, 0.001, 0.1, 10.0, 1000.0, 100000.0, 'scale'], 'kernel': ['rbf'], 'probability': [True]}-(3 of 5).csv",
 "../evaluate_results/outer/SVC-split_x_y_word2vec-{'C': [1e-05, 0.001, 0.1, 10.0, 1000.0, 100000.0], 'gamma': [1e-05, 0.001, 0.1, 10.0, 1000.0, 100000.0, 'scale'], 'kernel': ['rbf'], 'probability': [True]}-(1 of 5).csv"]

In [2]:
# Read all data
data = pd.concat([pd.read_csv(file, index_col=[0]) for file in files])
data.head(2)

Unnamed: 0,best_params,column,i_outer,metric,model,params,split_method,value
0,"{'algorithm': 'brute', 'metric': 'hamming', 'n...",0,2,accuracy,KNeighborsClassifier,"{'n_neighbors': [1, 5, 10, 15, 20, 25, 40, 60,...",split_x_y_with_projection,0.296296
1,"{'algorithm': 'brute', 'metric': 'hamming', 'n...",1,2,accuracy,KNeighborsClassifier,"{'n_neighbors': [1, 5, 10, 15, 20, 25, 40, 60,...",split_x_y_with_projection,0.099537


## Results

In [3]:
result = data.groupby(['model', 'split_method', 'metric'])['value'].agg(["mean", "std"])
result.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std
model,split_method,metric,Unnamed: 3_level_1,Unnamed: 4_level_1
KNeighborsClassifier,split_x_y,accuracy,0.356083,0.131148
KNeighborsClassifier,split_x_y,hit@5,0.538557,0.122381
KNeighborsClassifier,split_x_y,map@5,0.318705,0.153412


## Result as pivot table

In [4]:
result.pivot_table(index=['model', 'split_method'], columns='metric', values=['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean,mean,mean,std,std,std,std,std
Unnamed: 0_level_1,metric,accuracy,hit@5,map@5,mdcg,mrr,accuracy,hit@5,map@5,mdcg,mrr
model,split_method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
KNeighborsClassifier,split_x_y,0.356083,0.538557,0.318705,0.549631,0.423612,0.131148,0.122381,0.153412,0.103384,0.128872
KNeighborsClassifier,split_x_y_normalized_function,0.356083,0.538557,0.318705,0.549631,0.423612,0.131148,0.122381,0.153412,0.103384,0.128872
KNeighborsClassifier,split_x_y_split_with_one_hot_encoding,0.356083,0.538557,0.318705,0.549631,0.423612,0.131148,0.122381,0.153412,0.103384,0.128872
KNeighborsClassifier,split_x_y_split_with_one_hot_encoding_and_projection,0.249185,0.430046,0.297732,0.469657,0.335011,0.144095,0.158261,0.166704,0.122523,0.149018
KNeighborsClassifier,split_x_y_with_bag_of_words,0.342662,0.528224,0.310255,0.543993,0.416827,0.135818,0.130152,0.157562,0.106989,0.134011
KNeighborsClassifier,split_x_y_with_projection,0.249185,0.430046,0.297732,0.469657,0.335011,0.144095,0.158261,0.166704,0.122523,0.149018
KNeighborsClassifier,split_x_y_with_random_matrix,0.249185,0.4302,0.29806,0.469513,0.334886,0.144095,0.158608,0.16681,0.122637,0.149056
KNeighborsClassifier,split_x_y_word2vec,0.236234,0.417163,0.301938,0.463125,0.326973,0.150523,0.164459,0.168239,0.123632,0.150774
LogisticRegression,split_x_y,0.305872,0.499147,0.316863,0.522538,0.405859,0.131865,0.133656,0.153757,0.107007,0.128288
LogisticRegression,split_x_y_normalized_function,0.302246,0.497915,0.317374,0.521536,0.404155,0.131442,0.13779,0.156189,0.107166,0.128433


### Params selected

In [5]:
pd.set_option('max_colwidth', 100)
data.sort_values(['model', 'metric', 'split_method'])[['model', 'metric', 'best_params']].drop_duplicates()

Unnamed: 0,model,metric,best_params
0,KNeighborsClassifier,accuracy,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 15}"
0,KNeighborsClassifier,accuracy,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 25}"
0,KNeighborsClassifier,accuracy,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 100}"
0,KNeighborsClassifier,accuracy,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 60}"
0,KNeighborsClassifier,accuracy,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 40}"
6,KNeighborsClassifier,hit@5,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 60}"
6,KNeighborsClassifier,hit@5,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 25}"
6,KNeighborsClassifier,hit@5,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 40}"
6,KNeighborsClassifier,hit@5,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 100}"
6,KNeighborsClassifier,hit@5,"{'algorithm': 'brute', 'metric': 'hamming', 'n_neighbors': 80}"
