In [21]:
import numpy as np
import pandas as pd
import plotly.express as px

In [22]:
dataset = 'parkinson'
results_to_optim = 'partial'

In [89]:
df = pd.read_csv(f'grid_search_results/{dataset}_grid_val.csv')
df.head()

Unnamed: 0,dataset,conf_num,reg_type,weight_type,alpha,use_layer_norm,use_aug,lr,batch_size,full_score,partial_score
0,Parkinson,1,l1,,0.0,True,True,0.001,32,0.96875,0.962 +- 0.013
1,Parkinson,2,l1,,0.0,True,True,0.001,64,0.96875,0.959 +- 0.015
2,Parkinson,3,l1,,0.0,True,True,0.01,32,1.0,0.984 +- 0.016
3,Parkinson,4,l1,,0.0,True,True,0.01,64,0.96875,0.959 +- 0.017
4,Parkinson,5,l1,,0.0,True,True,0.1,32,0.96875,0.963 +- 0.02


In [90]:
label_col = f"{'partial' if results_to_optim == 'partial' else 'full'}_score"
other_label_col = f"{'full' if results_to_optim == 'partial' else 'partial'}_score"

df.drop(columns=["dataset", "conf_num", other_label_col],
        inplace=True)

if label_col == "partial_score":
    df[label_col] = df[label_col].apply(lambda x: float(x.split(" +- ")[0]))
df.head()

Unnamed: 0,reg_type,weight_type,alpha,use_layer_norm,use_aug,lr,batch_size,partial_score
0,l1,,0.0,True,True,0.001,32,0.962
1,l1,,0.0,True,True,0.001,64,0.959
2,l1,,0.0,True,True,0.01,32,0.984
3,l1,,0.0,True,True,0.01,64,0.959
4,l1,,0.0,True,True,0.1,32,0.963


In [91]:
# convert each categorical column to numeric and save the mapping
mappings = {}

for col in df.columns:
    if col != label_col:
        mapping = {label: idx for idx, label in enumerate(df[col].unique())}
        df[col] = df[col].map(mapping).astype(float)
        mappings[col] = mapping

## Parallel Coordinates Plot

In [92]:
df.head()

Unnamed: 0,reg_type,weight_type,alpha,use_layer_norm,use_aug,lr,batch_size,partial_score
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.962
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.959
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.984
3,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.959
4,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.963


In [93]:
mappings['reg_type'].keys()

dict_keys(['l1', 'l2', 'max', 'var'])

In [94]:
# add noise to each column, based on the order of elements in the column
noises = ((df[label_col] - df[label_col].min()) / (df[label_col].max() - df[label_col].min()) * 2 - 1) * .05
for col in df.columns:
    if col != label_col:
        df[col] += noises

In [95]:
import plotly.graph_objects as go
from matplotlib import cm

fig = go.Figure(
    go.Parcoords(
        line=dict(color=df[label_col], colorscale='RdYlBu', colorbar=dict(title=f"{results_to_optim}_score")),
        dimensions=[*[dict(label=col, values=df[col], tickvals=list(mappings[col].values()),
                           ticktext=list(mappings[col].keys())) for col in df.columns if col != label_col],
                    dict(label=f"{results_to_optim}_score", values=df[label_col])],
    )
)

# fig = px.parallel_coordinates(df, color=f"{results_to_optim}_score", color_continuous_scale=px.colors.diverging.Tealrose,)

fig.update_layout(
    plot_bgcolor='white',
    # paper_bgcolor = 'white'
)
fig.show()

In [26]:
df.head()

Unnamed: 0,reg_type,weight_type,alpha,lr,partial_score
0,l1,,0.0,0.001,0.96733
1,l1,,0.0,0.001,0.987216
2,l1,,0.0,0.01,0.992898
3,l1,,0.0,0.01,0.990057
4,l1,,0.0,0.1,0.93892
