# Results

So the first step is to obtain the project, and map each run to exactly which model, on which dataset, which fold

In [1]:
import os, sys
sys.path.append("../")
sys.path.append("../../")
sys.path.append("../../../")

In [2]:
from warnings import warn

In [3]:
import pandas as pd

In [4]:
from pykt.utils.wandb_utils import WandbUtils

In [5]:
wu = WandbUtils('letech', 'kt_toolkits')
wu.sweep_dict

self.sweep_dict is {'falconcode_2_2_dkt_qid_4': 'bqtj3c4l', 'falconcode_2_2_dkt_qid_3': 'fm9vipzn', 'falconcode_2_2_dkt_qid_2': '8lsdhg54', 'falconcode_2_2_dkt_qid_1': '0vuv0tui', 'falconcode_2_2_dkt_qid_0': '68sluxig'}


{'falconcode_2_2_dkt_qid_4': 'bqtj3c4l',
 'falconcode_2_2_dkt_qid_3': 'fm9vipzn',
 'falconcode_2_2_dkt_qid_2': '8lsdhg54',
 'falconcode_2_2_dkt_qid_1': '0vuv0tui',
 'falconcode_2_2_dkt_qid_0': '68sluxig'}

#### We obtain a list of the datasets we used and the models trained

In [6]:
datasets, models = set(), set()
sweep_names = wu.sweep_dict.keys()
for sn in sweep_names:
    parts = sn.split("_")
    datasets.add("_".join(parts[:3]))
    models.add("_".join(parts[3: parts.index("qid")]))
    
datasets, models

({'falconcode_2_2'}, {'dkt'})

#### Obtain for the given dataset, model, and folds, the best results

We also need to know for that given model, what are the hyperparameters, so w

In [7]:
def get_hyperparameter_names(selected_sweeps):
    _, model_configs = wu.get_df(selected_sweeps[0])
    rejected_paramters = ["model_name", "dataset_name", "save_dir", "fold", "seed"]
    hyperparameters = [k for k in model_configs if k not in rejected_paramters]
    return hyperparameters

In [8]:
def get_best_hyperparameters(grouped_results, hyperparams):
    best_hyperparams = grouped_results.groupby(hyperparams)["validauc"].mean().to_frame()
    # Obtain the dataframe with the order of importance of each hyperparameter combination
    best_hyperparams = best_hyperparams.sort_values(by="validauc", ascending=False)
    best_hyperparams = best_hyperparams.head(1).index
    best_hyperparams = best_hyperparams.to_frame().reset_index(drop=True).iloc[0].to_dict()
    query_best_hyperparams = " & ".join([f"({k}=={repr(v)})" for k, v in best_hyperparams.items()])
    return best_hyperparams, query_best_hyperparams

In [9]:
def check_all_finnished(selected_sweeps):
    for ss in selected_sweeps:
        status = wu.get_sweep_info(ss)["state"]
        if status != "FINISHED":
            message = "Careful sweep " + ss + " hasn't finnished yet"
            warn(message)

In [10]:
from collections import defaultdict
from itertools import product

f = lambda : defaultdict(dict)
dataframes = []
best_hyperparams = defaultdict(f)
for dataset_name, model_name in product(datasets, models):
    selected_sweeps = [sn for sn in sweep_names 
                       if dataset_name in sn and model_name in sn]
    check_all_finnished(selected_sweeps)
    grouped_results = pd.concat(wu.get_multi_df(selected_sweeps))
    hyperparameters = get_hyperparameter_names(selected_sweeps)
    bests, query = get_best_hyperparameters(grouped_results, 
                                     hyperparameters)
    best_hyperparams[dataset_name][model_name] = bests
    dataframes.append(grouped_results.query(query))
    
best_hyperparams = dict(best_hyperparams)
best_hyperparams

  warn(message)
  warn(message)


{'falconcode_2_2': defaultdict(dict,
             {'dkt': {'dropout': 0.05,
               'emb_size': 64,
               'emb_type': 'qid',
               'learning_rate': 0.001}})}

In [15]:
dataframes

[                                     model_save_path  _step            _wandb   
 0  /scratch/work/koutchc1/experiments/pykt/models...    0.0  {'runtime': 218}  \
 0  /scratch/work/koutchc1/experiments/pykt/models...    0.0  {'runtime': 218}   
 0  /scratch/work/koutchc1/experiments/pykt/models...    0.0  {'runtime': 189}   
 0  /scratch/work/koutchc1/experiments/pykt/models...    0.0  {'runtime': 198}   
 
      _runtime  validacc  validauc    _timestamp  best_epoch fold  seed  ...   
 0  216.578810  0.960831  0.991248  1.686056e+09        15.0    4    42  ...  \
 0  216.108125  0.960937  0.991420  1.686056e+09        15.0    3    42  ...   
 0  183.003591  0.960391  0.990852  1.686056e+09        14.0    2    42  ...   
 0  197.870851  0.960923  0.991216  1.686056e+09        15.0    1    42  ...   
 
                                             save_dir  model_name   
 0  /scratch/work/koutchc1/experiments/pykt/models...         dkt  \
 0  /scratch/work/koutchc1/experiments/pykt/mode

In [12]:
grouped_results

Unnamed: 0,model_save_path,_step,_wandb,_runtime,validacc,validauc,_timestamp,best_epoch,fold,seed,...,save_dir,model_name,dataset_name,learning_rate,name,path_id,state,create_time,run_index,sweep_name
0,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 218},216.578810,0.960831,0.991248,1.686056e+09,15.0,4,42,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00100,iconic-sweep-1,1v2auyhf,finished,1686056221,0,falconcode_2_2_dkt_qid_4
1,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 643},638.690235,0.957426,0.989503,1.686058e+09,69.0,4,42,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00010,comic-sweep-3,15orlura,finished,1686058003,1,falconcode_2_2_dkt_qid_4
2,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 631},632.499490,0.958821,0.989171,1.686059e+09,66.0,4,3407,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00010,scarlet-sweep-4,z6sid2k6,finished,1686058652,2,falconcode_2_2_dkt_qid_4
3,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 494},495.264071,0.860446,0.917427,1.686059e+09,49.0,4,42,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00001,tough-sweep-5,ju0u2ucr,finished,1686059160,3,falconcode_2_2_dkt_qid_4
4,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 453},454.405736,0.862639,0.911742,1.686060e+09,45.0,4,3407,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00001,lucky-sweep-6,baznyfik,finished,1686059642,4,falconcode_2_2_dkt_qid_4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 646},647.243900,0.913043,0.973214,1.686074e+09,2.0,0,3407,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00100,noble-sweep-44,j4aqv7mz,finished,1686073518,36,falconcode_2_2_dkt_qid_0
37,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 1135},1135.656084,0.913043,0.973214,1.686075e+09,11.0,0,42,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00010,prime-sweep-45,dwt4yx4c,finished,1686074672,37,falconcode_2_2_dkt_qid_0
38,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 1181},1182.352346,0.913043,0.973214,1.686076e+09,12.0,0,3407,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00010,sparkling-sweep-46,odpkp47d,finished,1686075889,38,falconcode_2_2_dkt_qid_0
39,/scratch/work/koutchc1/experiments/pykt/models...,0.0,{'runtime': 968},968.721598,0.869565,0.812500,1.686077e+09,8.0,0,42,...,/scratch/work/koutchc1/experiments/pykt/models...,dkt,falconcode_2_2,0.00001,iconic-sweep-47,b99uwpct,finished,1686076871,39,falconcode_2_2_dkt_qid_0


At the end here we have a mapping of the hyperparameters to rerun model training with, which we will save somewhere in a file or a dictionary. We can then pass this dictionary to another training script which will run the model trainings with the good hyperparameters

In [13]:
import json

best_configs_path = "/home/koutchc1/pykt-toolkit/configs/best_hyperparameters.json"

data = {}
if os.path.exists(best_configs_path):
    with open(best_configs_path, "r") as f:
        data = json.load(f)
data.update(best_hyperparams)

with open(best_configs_path, "w") as f:
    json.dump(data, f, indent = 3)