In [1]:
import pandas as pd
import numpy as np
from codebase import io
from codebase.evaluation import ndcg

In [2]:
tracking = io.TRACKING_DF
start_model = 2
model_ids = tracking[start_model::3].index
print(model_ids)

Int64Index([  2,   5,   8,  11,  14,  17,  20,  23,  26,  29,  32,  35,  38,
             41,  44,  47,  50,  53,  56,  59,  62,  65,  68,  71,  74,  77,
             80,  83,  86,  89,  92,  95,  98, 101, 104, 107, 110, 113, 116,
            119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, 155,
            158, 161, 164, 167, 170, 173, 176, 179, 182, 185, 188, 191, 194,
            197, 200, 203, 206, 209, 212, 215, 218, 221, 224, 227, 230, 233,
            236, 239, 242, 245, 248, 251, 254, 257, 260, 263, 266, 269, 272,
            275, 278, 281],
           dtype='int64')


In [3]:
# Retrieve average results over run
results = io.load_jsons(model_ids)
for c in results:
    results[c] = results[c].apply(lambda x: np.mean(x))

In [4]:

# Merge tracking and results on model_id
full_results = pd.merge(tracking, results, left_index=True, right_index=True)

# Recalculate true ndcg values for relevance changes
for i, model in enumerate(full_results.index):
    if full_results["uniform_relevance"].iloc[i] > 0 or full_results["artificial_relevance"].iloc[i] > 0:
        print(f"i: {i}, model: {model}")
        full_results["val_ndcg@5"].iloc[i] = ndcg(io.load_val_predictions(model))

# Group folds by averaging over them (every 3 results are 1 k-folds run)
results_reset = full_results.reset_index()
grouped_results = results_reset.groupby(results_reset.index//3).mean()

# Get default parameters and their values
params = [c for c in tracking.columns if not c == "device"]
default_params = tracking.head(1)

# Print out default parameters
# for p in params:
#     print(p, default_params[p].iloc[0])

# Define function to get more insightfull data format
def find_mutation(parameters, default, row):
    """Finds what parameter was mutated in the row and what value it is"""

    # Return name of parameter that was mutated
    for p in parameters:
        if not default[p].iloc[0] == row[p]:
            return p, row[p]
    return None, None


# Create "mutation" and "mutation value" column for instant mutation recognition
mutations = []
mutation_values = []
for _, row in grouped_results.iterrows():
    mut = find_mutation(params, default_params, row)
    mutations.append(mut[0])
    mutation_values.append(mut[1])
grouped_results["mutation"] = mutations
grouped_results["mutation value"] = mutation_values


i: 48, model: 146
Calculating NDCG...


100% (22412 of 22412) |##################| Elapsed Time: 0:00:03 Time:  0:00:03
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


i: 49, model: 149
Calculating NDCG...


100% (21900 of 21900) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


i: 50, model: 152
Calculating NDCG...


100% (22122 of 22122) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


i: 51, model: 155
Calculating NDCG...


100% (22412 of 22412) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


i: 52, model: 158
Calculating NDCG...


100% (21900 of 21900) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


i: 53, model: 161
Calculating NDCG...


100% (22122 of 22122) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


In [5]:
std_default_val_ndcg = grouped_results[grouped_results.isna()["mutation value"]]["val_ndcg@5"].std()
mu_default_val_ndcg = grouped_results[grouped_results.isna()["mutation value"]]["val_ndcg@5"].mean()
print(f"default settings performance mean: {mu_default_val_ndcg}, std: {std_default_val_ndcg}")

grouped_results["val@5_diff_std"] = ((grouped_results["val_ndcg@5"] - grouped_results["val_ndcg@5"].iloc[0])/std_default_val_ndcg).round(1)
grouped_results["epoch_time"] = grouped_results["epoch_time"].round().astype(int)
grouped_results["model_id"] = grouped_results["index"].astype(int)

relevant_cols = ["model_id", 
                 "mutation",
                 "mutation value",
                 "epoch_time",
#                  "trn_ndcg", 
#                  "val_ndcg", 
                 "trn_ndcg@5", 
                 "val_ndcg@5",
                 "val@5_diff_std"]
grouped_results[relevant_cols]

default settings performance mean: 0.30478896638797875, std: 0.09307461135705937


Unnamed: 0,model_id,mutation,mutation value,epoch_time,trn_ndcg@5,val_ndcg@5,val@5_diff_std
0,5,,,108,0.428676,0.377042,0.0
1,14,layer_size,20.0,63,0.406651,0.368609,-0.1
2,23,layer_size,320.0,170,0.448108,0.372058,-0.1
3,32,learning_rate,0.001,111,0.459808,0.365023,-0.1
4,41,learning_rate,1e-05,113,0.379263,0.34765,-0.3
5,50,layers,2.0,76,0.425777,0.381839,0.1
6,59,layers,8.0,142,0.432364,0.378887,0.0
7,68,resnet,0.0,109,0.417086,0.365222,-0.1
8,77,attention_layer_idx,-1.0,93,0.430246,0.376584,-0.0
9,86,attention_layer_idx,0.0,112,0.431791,0.378251,0.0


In [6]:
# Order the results by their validation ndcg
ordered = full_results.sort_values(by='val_ndcg@5', ascending=False)

In [7]:
# for val in ordered.head(0):
#     print(f"{val}: {ordered.head(1)[val].item()}")

In [8]:
grouped_results

Unnamed: 0,index,artificial_relevance,attention_layer_idx,datetime_shenanigans,epochs,exp_ver,lambda_batch_size,layer_size,layers,learning_rate,...,use_priors,epoch_time,trn_ndcg,trn_ndcg@5,val_ndcg,val_ndcg@5,mutation,mutation value,val@5_diff_std,model_id
0,5,0.0,1.0,1.0,3.0,0.0,150.0,80.0,4.0,0.0001,...,1.0,108,0.990812,0.428676,0.826461,0.377042,,,0.0,5
1,14,0.0,1.0,1.0,3.0,0.0,150.0,20.0,4.0,0.0001,...,1.0,63,1.097771,0.406651,0.780376,0.368609,layer_size,20.0,-0.1,14
2,23,0.0,1.0,1.0,3.0,0.0,150.0,320.0,4.0,0.0001,...,1.0,170,0.799757,0.448108,0.771158,0.372058,layer_size,320.0,-0.1,23
3,32,0.0,1.0,1.0,3.0,0.0,150.0,80.0,4.0,0.001,...,1.0,111,1.078846,0.459808,0.796936,0.365023,learning_rate,0.001,-0.1,32
4,41,0.0,1.0,1.0,3.0,0.0,150.0,80.0,4.0,1e-05,...,1.0,113,1.123546,0.379263,0.775536,0.34765,learning_rate,1e-05,-0.3,41
5,50,0.0,1.0,1.0,3.0,0.0,150.0,80.0,2.0,0.0001,...,1.0,76,0.661281,0.425777,0.76947,0.381839,layers,2.0,0.1,50
6,59,0.0,1.0,1.0,3.0,0.0,150.0,80.0,8.0,0.0001,...,1.0,142,1.101151,0.432364,0.766282,0.378887,layers,8.0,0.0,59
7,68,0.0,1.0,1.0,3.0,0.0,150.0,80.0,4.0,0.0001,...,1.0,109,1.099439,0.417086,0.832742,0.365222,resnet,0.0,-0.1,68
8,77,0.0,-1.0,1.0,3.0,0.0,150.0,80.0,4.0,0.0001,...,1.0,93,1.219488,0.430246,0.780552,0.376584,attention_layer_idx,-1.0,-0.0,77
9,86,0.0,0.0,1.0,3.0,0.0,150.0,80.0,4.0,0.0001,...,1.0,112,1.253084,0.431791,0.811049,0.378251,attention_layer_idx,0.0,0.0,86


In [9]:
relevant = ordered[ordered["val_ndcg@5"] > ordered["val_ndcg@5"][0]

SyntaxError: unexpected EOF while parsing (<ipython-input-9-92e709d6de3b>, line 1)

In [None]:
relevant