In [2]:
import os
import yaml
import pandas as pd

In [3]:
def get_experiment_config(model_name, run_name):
    """
    Get the config of an experiment (with run name) - multirun
    """
    file_path = os.path.join("../logs/experiments/multiruns",model_name,run_name,"multirun.yaml") #if sweep params were provided in command line
    if not os.path.exists(file_path):    #if a sweeper config was used
        file_path = os.path.join("../logs/experiments/multiruns",model_name,run_name,"0",".hydra","hydra.yaml")
    
    with open(file_path, 'r') as file:
            exp_config = yaml.safe_load(file) 
    return exp_config

def get_sweep_variables(exp_config):
    """
    Get the variables that were swept in the experiment
    """
    if exp_config["hydra"]["sweeper"]["params"] is not None: # this is used if a sweeper config was used
        variables = [(k,[v_.strip() for v_ in v.split(",")]) for k,v in exp_config["hydra"]["sweeper"]["params"].items()]
    else:
        variables = [(s.split("=")[0],s.split("=")[1].split(",")) for s in exp_config["hydra"]["overrides"]["task"]]
    variables = {v[0]:v[1] for v in variables if len(v[1]) > 1}
    return variables

def get_all_results_exp(model_name, run_name, sweep_variables):
    dir_name = os.path.join("../logs/experiments/multiruns",model_name,run_name)
    run_ids = [ f for f in os.listdir(dir_name) if os.path.isdir(os.path.join(dir_name,f))]

    df_list = []
    for run_id in run_ids:
        run_results = get_extended_run_results(model_name, run_name, run_id, sweep_variables)
        if run_results is not None:
            df_list.append(run_results)

    df_results = pd.concat(df_list)
    return df_results

def get_extended_run_results(model_name, run_name, run_id, sweep_variables):
    run_config = get_run_config(model_name, run_name, run_id)

    variables_from_run = extract_variables_from_run(sweep_variables, run_config)

    run_results = get_run_results(model_name, run_name, run_id)

    if run_results is not None:
        for var in variables_from_run.keys():
            run_results[var] = variables_from_run[var]

    return run_results

def extract_variables_from_run(variables, run_config):
    """
    Extract the values of the variables that were swept in the experiment, from the config of a specific run
    """
    extracted_variables = {}
    for conf_var in variables.keys():
        conf_value = None
        if conf_var == "data":
            splitted_conf_var = ["dataset_name"]
        else:
            splitted_conf_var = conf_var.split(".")
        for conf_ in splitted_conf_var:
            if conf_value is None:
                conf_value = run_config[conf_]
            else:
                conf_value = conf_value[conf_]
        ### THIS IS A FIX TO DISTINGUISH BETWEEN SWISS ROLL DATASETS - REMOVE IN NEXT ITERATION ---
        if conf_var == "data":
            if conf_value == "tree":
                if run_config["data"]["n_dim"] == 30:
                    conf_value = "tree_high"
        ### ---------------------------------------------------------------------------------------
        
        extracted_variables[conf_var] = conf_value
    return extracted_variables

def get_run_config(model_name, run_name, run_id):
    """
    Get the config of a specific run (with run id)
    """
    file_path = os.path.join("../logs/experiments/multiruns",model_name,run_name,run_id,".hydra","config.yaml")
    with open(file_path, 'r') as file:
        run_config = yaml.safe_load(file)
    return run_config

def get_run_results(model_name, run_name, run_id):
    """
    Get the results of a specific run (with run id)
    """
    dir_path = os.path.join("../logs/experiments/multiruns",model_name,run_name,run_id)
    pkl_files = [f for f in os.listdir(dir_path) if "pkl" in f]
    if len(pkl_files)!=1:
        print("No PKL file found for {model_name} {run_name} {run_id}".format(model_name=model_name, run_name=run_name, run_id=run_id))
        print("Config for this run : ")
        print(get_run_config(model_name, run_name, run_id))
        return None
    else:
        pkl_file = pkl_files[0]
        return pd.read_pickle(os.path.join(dir_path,pkl_file))

In [4]:
def get_best(df,sweep_variables):
    metric = "val_acc"
    test_metric = "test_acc"

    df_m = df.groupby(list(sweep_variables.keys()))[[metric,test_metric]].mean().reset_index()
    df_s = df.groupby(list(sweep_variables.keys()))[[metric,test_metric]].std().reset_index()

    best_ix = df_m.loc[df_m[metric].argmax()]

    df_m_best = df_m.loc[[df_m[metric].argmax()]].copy()
    df_s_best = df_s.loc[[df_m[metric].argmax()]].copy()

    df_s_best.rename(columns = {x:x+"_std" for x in df_s_best.columns if x not in sweep_variables.keys()}, inplace = True)

    df_best = pd.merge(df_m_best,df_s_best,how = "inner", on = list(sweep_variables.keys()))
    return df_best

In [5]:
exp_name = "scattering_modelnet"
timestamp = "2023-07-04_13-37-36" 
exp_config = get_experiment_config(exp_name,timestamp)
scattering_sweep_variables = get_sweep_variables(exp_config)
df_scattering = get_all_results_exp(exp_name, timestamp, scattering_sweep_variables)

scattering_sweep_variables = {k:v for k,v in scattering_sweep_variables.items() if k not in ["data.random_state"]}

exp_name = "scattering_modelnet_knn"
timestamp = "2023-07-05_16-16-46" 
exp_config = get_experiment_config(exp_name,timestamp)
scattering_knn_sweep_variables = get_sweep_variables(exp_config)
df_scattering_knn = get_all_results_exp(exp_name, timestamp, scattering_knn_sweep_variables)

scattering_knn_sweep_variables = {k:v for k,v in scattering_knn_sweep_variables.items() if k not in ["data.random_state"]}

exp_name = "scattering_modelnet_eps"
timestamp = "2023-07-05_19-11-32" 
exp_config = get_experiment_config(exp_name,timestamp)
scattering_eps_sweep_variables = get_sweep_variables(exp_config)
df_scattering_eps = get_all_results_exp(exp_name, timestamp, scattering_eps_sweep_variables)

scattering_eps_sweep_variables = {k:v for k,v in scattering_eps_sweep_variables.items() if k not in ["data.random_state"]}

exp_name = "gnn_modelnet_eps"
timestamp = "2023-07-04_14-18-40" 
exp_config = get_experiment_config(exp_name,timestamp)
gnn_dense_sweep_variables = get_sweep_variables(exp_config)
df_gnn_dense = get_all_results_exp(exp_name, timestamp, gnn_dense_sweep_variables)

gnn_dense_sweep_variables = {k:v for k,v in gnn_dense_sweep_variables.items() if k not in ["data.random_state"]}

exp_name = "gnn_modelnet_knn"
timestamp = "2023-07-05_00-20-53" 
exp_config = get_experiment_config(exp_name,timestamp)
gnn_knn_sweep_variables = get_sweep_variables(exp_config)
df_gnn_knn = get_all_results_exp(exp_name, timestamp, gnn_knn_sweep_variables)

gnn_knn_sweep_variables = {k:v for k,v in gnn_knn_sweep_variables.items() if k not in ["data.random_state"]}


exp_name = "gnn_modelnet_eps"
timestamp = "2023-07-05_16-59-15" 
exp_config = get_experiment_config(exp_name,timestamp)
gnn_eps_sweep_variables = get_sweep_variables(exp_config)
df_gnn_eps = get_all_results_exp(exp_name, timestamp, gnn_eps_sweep_variables)

gnn_eps_sweep_variables = {k:v for k,v in gnn_eps_sweep_variables.items() if k not in ["data.random_state"]}

exp_name = "mnn_modelnet_dense"
timestamp = "2023-07-05_18-20-46" 
exp_config = get_experiment_config(exp_name,timestamp)
mnn_dense_sweep_variables = get_sweep_variables(exp_config)
df_mnn_dense = get_all_results_exp(exp_name, timestamp, mnn_dense_sweep_variables)

mnn_dense_sweep_variables = {k:v for k,v in mnn_dense_sweep_variables.items() if k not in ["data.random_state"]}


exp_name = "mnn_modelnet_eps"
timestamp = "2023-07-06_01-41-50" 
exp_config = get_experiment_config(exp_name,timestamp)
mnn_eps_sweep_variables = get_sweep_variables(exp_config)
df_mnn_eps = get_all_results_exp(exp_name, timestamp, mnn_eps_sweep_variables)

mnn_eps_sweep_variables = {k:v for k,v in mnn_eps_sweep_variables.items() if k not in ["data.random_state"]}

exp_name = "mnn_modelnet_knn"
timestamp = "2023-07-05_23-42-13" 
exp_config = get_experiment_config(exp_name,timestamp)
mnn_knn_sweep_variables = get_sweep_variables(exp_config)
df_mnn_knn = get_all_results_exp(exp_name, timestamp, mnn_knn_sweep_variables)

mnn_knn_sweep_variables = {k:v for k,v in mnn_knn_sweep_variables.items() if k not in ["data.random_state"]}


In [6]:
configs = [(df_scattering, scattering_sweep_variables,"Scattering", "Dense"),
           (df_scattering_knn, scattering_knn_sweep_variables,"Scattering", "KNN"),
              (df_scattering_eps, scattering_eps_sweep_variables,"Scattering", "Epsilon"),
 (df_gnn_dense, gnn_dense_sweep_variables,"GNN", "Dense"),
 (df_gnn_knn, gnn_knn_sweep_variables,"GNN", "KNN"),
(df_gnn_eps, gnn_eps_sweep_variables,"GNN", "Epsilon"),
(df_mnn_dense, mnn_dense_sweep_variables,"MNN", "Dense"),
(df_mnn_knn, mnn_knn_sweep_variables,"MNN", "KNN"),
(df_mnn_eps, mnn_eps_sweep_variables,"MNN", "Epsilon")
]

df = []
for config in configs:
    df_ = get_best(config[0], config[1])
    df_["Model name"] = config[2]
    df_["Graph type"] = config[3]
    df.append(df_)

df = pd.concat(df)
df = df[["Model name", "Graph type", "val_acc","val_acc_std","test_acc", "test_acc_std"]]


In [7]:
df["Accuracy"] = df["test_acc"].round(2).astype(str) + " $\pm$ " + df["test_acc_std"].round(2).astype(str)

In [8]:
print(df.pivot(index = "Model name", columns = "Graph type", values = "Accuracy").to_latex(index = True, escape = False))

\begin{tabular}{llll}
\toprule
Graph type & Dense & Epsilon & KNN \\
Model name &  &  &  \\
\midrule
GNN & 0.54 $\pm$ 0.02 & 0.63 $\pm$ 0.01 & 0.71 $\pm$ 0.02 \\
MNN & 0.75 $\pm$ 0.02 & 0.75 $\pm$ 0.02 & 0.73 $\pm$ 0.01 \\
Scattering & 0.6 $\pm$ 0.01 & 0.58 $\pm$ 0.01 & 0.61 $\pm$ 0.0 \\
\bottomrule
\end{tabular}



In [19]:
print(df[["Model name","Accuracy"]].to_latex(index = False, escape = False))

\begin{tabular}{ll}
\toprule
Model name & Accuracy \\
\midrule
Scattering_dense & 0.6 $\pm$ 0.01 \\
Scattering_knn & 0.61 $\pm$ 0.0 \\
Scattering_eps & 0.58 $\pm$ 0.01 \\
GNN_dense & 0.54 $\pm$ 0.02 \\
GNN_knn & 0.71 $\pm$ 0.02 \\
GNN_dense & 0.63 $\pm$ 0.01 \\
MNN_dense & 0.75 $\pm$ 0.02 \\
\bottomrule
\end{tabular}

