# Results

So the first step is to obtain the project, and map each run to exactly which model, on which dataset, which fold

In [1]:
import os, sys
sys.path.append("../")
sys.path.append("../../")
sys.path.append("../../../")

In [2]:
from warnings import warn

In [3]:
import pandas as pd

In [4]:
from pykt.utils.wandb_utils import WandbUtils

In [5]:
wu = WandbUtils('letech', 'kt_toolkits')
wu.sweep_dict

self.sweep_dict is {'falconcode_2_2_dkt_qid_4': 'bqtj3c4l', 'falconcode_2_2_dkt_qid_3': 'fm9vipzn', 'falconcode_2_2_dkt_qid_2': '8lsdhg54', 'falconcode_2_2_dkt_qid_1': '0vuv0tui', 'falconcode_2_2_dkt_qid_0': '68sluxig'}


{'falconcode_2_2_dkt_qid_4': 'bqtj3c4l',
 'falconcode_2_2_dkt_qid_3': 'fm9vipzn',
 'falconcode_2_2_dkt_qid_2': '8lsdhg54',
 'falconcode_2_2_dkt_qid_1': '0vuv0tui',
 'falconcode_2_2_dkt_qid_0': '68sluxig'}

#### We obtain a list of the datasets we used and the models trained

In [6]:
datasets, models = set(), set()
sweep_names = wu.sweep_dict.keys()
for sn in sweep_names:
    parts = sn.split("_")
    datasets.add("_".join(parts[:3]))
    models.add("_".join(parts[3: parts.index("qid")]))
    
datasets, models

({'falconcode_2_2'}, {'dkt'})

#### Obtain for the given dataset, model, and folds, the best results

We also need to know for that given model, what are the hyperparameters, so w

In [7]:
def get_hyperparameter_names(selected_sweeps):
    _, model_configs = wu.get_df(selected_sweeps[0])
    rejected_paramters = ["model_name", "dataset_name", "save_dir", "fold", "seed"]
    hyperparameters = [k for k in model_configs if k not in rejected_paramters]
    return hyperparameters

In [16]:
def get_best_hyperparameters(grouped_results, hyperparams):
    best_hyperparams = grouped_results.groupby(hyperparams)["validauc"].mean().to_frame()
    # Obtain the dataframe with the order of importance of each hyperparameter combination
    best_hyperparams = best_hyperparams.sort_values(by="validauc", ascending=False)
    print(best_hyperparams)
    best_hyperparams = best_hyperparams.head(1).index
    best_hyperparams = best_hyperparams.to_frame().reset_index(drop=True).iloc[0].to_dict()
    query_best_hyperparams = " & ".join([f"({k}=={repr(v)})" for k, v in best_hyperparams.items()])
    return best_hyperparams, query_best_hyperparams

In [17]:
def check_all_finnished(selected_sweeps):
    for ss in selected_sweeps:
        status = wu.get_sweep_info(ss)["state"]
        if status != "FINISHED":
            message = "Careful sweep " + ss + " hasn't finnished yet"
            warn(message)

In [18]:
from collections import defaultdict
from itertools import product

f = lambda : defaultdict(dict)
dataframes = []
best_hyperparams = defaultdict(f)
for dataset_name, model_name in product(datasets, models):
    selected_sweeps = [sn for sn in sweep_names 
                       if dataset_name in sn and model_name in sn]
    check_all_finnished(selected_sweeps)
    grouped_results = pd.concat(wu.get_multi_df(selected_sweeps))
    hyperparameters = get_hyperparameter_names(selected_sweeps)
    bests, query = get_best_hyperparameters(grouped_results, 
                                     hyperparameters)
    best_hyperparams[dataset_name][model_name] = bests
    dataframes.append(grouped_results.query(query))
    
best_hyperparams = dict(best_hyperparams)
best_hyperparams

  warn(message)
  warn(message)


                                         validauc
dropout emb_size emb_type learning_rate          
0.05    64       qid      0.00100        0.991184
                          0.00010        0.989159
        256      qid      0.00100        0.987872
                          0.00001        0.987581
0.10    256      qid      0.00100        0.987405
0.30    256      qid      0.00100        0.987380
0.05    256      qid      0.00010        0.986626
0.10    256      qid      0.00010        0.986609
0.30    256      qid      0.00010        0.986552
0.50    256      qid      0.00010        0.986536
                          0.00100        0.986197
0.10    64       qid      0.00100        0.985563
0.30    64       qid      0.00100        0.985522
0.50    64       qid      0.00100        0.984272
0.30    64       qid      0.00010        0.976312
0.10    64       qid      0.00010        0.976236
0.50    64       qid      0.00010        0.976205
0.30    256      qid      0.00001        0.948368


{'falconcode_2_2': defaultdict(dict,
             {'dkt': {'dropout': 0.05,
               'emb_size': 64,
               'emb_type': 'qid',
               'learning_rate': 0.001}})}

In [35]:
def get_best_models(grouped_results):
    f = lambda df: df.sort_values(by="validauc").iloc[-1]
    return grouped_results.groupby("fold").apply(f)

In [39]:
from collections import defaultdict
from itertools import product

f = lambda : defaultdict(dict)
dataframes = []
best_models = defaultdict(f)
for dataset_name, model_name in product(datasets, models):
    selected_sweeps = [sn for sn in sweep_names 
                       if dataset_name in sn and model_name in sn]
    check_all_finnished(selected_sweeps)
    grouped_results = pd.concat(wu.get_multi_df(selected_sweeps))
    sub_df = get_best_models(grouped_results)
    best_models[dataset_name][model_name] = list(sub_df.model_save_path)

  warn(message)
  warn(message)


                                        model_save_path  _step   
fold                                                             
0     /scratch/work/koutchc1/experiments/pykt/models...    0.0  \
1     /scratch/work/koutchc1/experiments/pykt/models...    0.0   
2     /scratch/work/koutchc1/experiments/pykt/models...    0.0   
3     /scratch/work/koutchc1/experiments/pykt/models...    0.0   
4     /scratch/work/koutchc1/experiments/pykt/models...    0.0   

                 _wandb     _runtime  validacc  validauc    _timestamp   
fold                                                                     
0      {'runtime': 630}   630.513108  0.913043  0.982143  1.686067e+09  \
1     {'runtime': 1101}  1102.221098  0.962872  0.992127  1.686083e+09   
2      {'runtime': 909}   910.014877  0.961602  0.991606  1.686061e+09   
3      {'runtime': 849}   849.620508  0.962496  0.992234  1.686080e+09   
4      {'runtime': 966}   966.849191  0.962972  0.992089  1.686061e+09   

      best_epoch f

In [38]:
best_models['falconcode_2_2']['dkt']

['/scratch/work/koutchc1/experiments/pykt/models/dkt/falconcode_2_2_dkt_qid_3407_0_0.3_256_0.001_1_1_a9701277-4562-4bdc-8a92-a0a324be54ef/qid_model.ckpt',
 '/scratch/work/koutchc1/experiments/pykt/models/dkt/falconcode_2_2_dkt_qid_42_1_0.1_256_0.001_1_1_a92d9011-a03a-4b69-9ea6-dc7aa896213a/qid_model.ckpt',
 '/scratch/work/koutchc1/experiments/pykt/models/dkt/falconcode_2_2_dkt_qid_42_2_0.05_256_0.001_1_1_7ad91fb5-ec86-46af-bcd9-3073962861c9/qid_model.ckpt',
 '/scratch/work/koutchc1/experiments/pykt/models/dkt/falconcode_2_2_dkt_qid_42_3_0.1_256_0.001_1_1_0fcf8845-83d2-4410-a805-c41e783e34b3/qid_model.ckpt',
 '/scratch/work/koutchc1/experiments/pykt/models/dkt/falconcode_2_2_dkt_qid_42_4_0.05_256_0.001_1_1_22bebde0-85f7-45d0-a66a-d7a5221b01bc/qid_model.ckpt']

At the end here we have a mapping of the hyperparameters to rerun model training with, which we will save somewhere in a file or a dictionary. We can then pass this dictionary to another training script which will run the model trainings with the good hyperparameters

In [41]:
import json

best_configs_path = "/home/koutchc1/pykt-toolkit/configs/best_model.json"

data = {}
if os.path.exists(best_configs_path):
    with open(best_configs_path, "r") as f:
        data = json.load(f)
data.update(best_models)

with open(best_configs_path, "w") as f:
    json.dump(data, f, indent = 3)

In [44]:
best_models_txt_path = "/home/koutchc1/pykt-toolkit/best_models.txt"
with open(best_models_txt_path, 'w') as f:
    for dataset, models_to_path in data.items():
        for paths in models_to_path.values():
            for path in paths:
                f.write("/".join(path.split("/")[:-1]) + "\n")