# Compiling the hyperparameters search data

The objective is to generate 2 csvs:
- hs_max.csv: has all the max values from the hyperparameters search
- hs_all.csv: has all the values from the hyperparameters search

In [169]:
import yaml
import pandas as pd

## Data without reducer

In [170]:
datasets = ['kuhar', 'motionsense', 'uci', 'wisdm', 'realworld_thigh', 'realworld_waist']

In [171]:
data = []

In [172]:
for dataset in datasets:
    try:
        no_reducer_experiment = f'../execute_once_experiments/_previous/TV_sb_no_reducer/scores/no_reducer_{dataset}.yaml'
        with open(no_reducer_experiment) as f:
            no_reducer_score = yaml.load(f, Loader=yaml.FullLoader)['score']
    except:
        no_reducer_score = 0
        print(f'No reducer score not found for {dataset}')
    new_val = {'dataset': dataset, 'model': 'no_reducer', 'percent': 100, 'dim': 360, 'score': no_reducer_score}
    data.append(new_val)

In [173]:
df = pd.DataFrame(data)
df

Unnamed: 0,dataset,model,percent,dim,score
0,kuhar,no_reducer,100,360,0.671127
1,motionsense,no_reducer,100,360,0.785952
2,uci,no_reducer,100,360,0.847059
3,wisdm,no_reducer,100,360,0.747373
4,realworld_thigh,no_reducer,100,360,0.671521
5,realworld_waist,no_reducer,100,360,0.787217


In [174]:
df.to_csv('TV_no_reducer_scores.csv', index=False)

## Max values per hyperparameter search

In [175]:
data = []

In [176]:
models = ['umap', 'ae', 'tae', 'convae', 'convtae']
percentages = [2.5, 5, 25, 50, 75, 100, 200]

In [177]:
for dataset in datasets:
    value = {'dataset': dataset, 'model': 'no_reducer', 'percent': 100, 'dim': 0, 'score': no_reducer_score}
    for model in models:
        value['model'] = model
        percentage_data = []
        dim_col_name = 'config/umap_ncomp' if model == 'umap' else 'config/latent_dim' 
        for percentage in percentages:
            value['percent'] = percentage
            experiment_name = f'P10_{model}_{dataset}_p{percentage}'
            if model == 'umap':
                experiment_name = experiment_name[4:]
            try:
                experiment_data = pd.read_csv(f'../experiments/{experiment_name}/data.csv')
                max_row = experiment_data.loc[experiment_data['score'].idxmax()]
                data_max_value = max_row['score']
                # print(max_row['score'], experiment_data['score'].max())
                data_dim = max_row[dim_col_name]
                # data_max_value = experiment_data['score'].max()
            except:
                data_max_value = 0
                print(f'No data for {experiment_name}')
            value['dim'] = data_dim
            value['score'] = data_max_value
            data.append(value.copy())

In [178]:
pd.DataFrame(data)

Unnamed: 0,dataset,model,percent,dim,score
0,kuhar,umap,2.5,7,0.424883
1,kuhar,umap,5.0,18,0.424883
2,kuhar,umap,25.0,89,0.429577
3,kuhar,umap,50.0,53,0.420188
4,kuhar,umap,75.0,51,0.422535
...,...,...,...,...,...
205,realworld_waist,convtae,25.0,21,0.834951
206,realworld_waist,convtae,50.0,32,0.840345
207,realworld_waist,convtae,75.0,12,0.821467
208,realworld_waist,convtae,100.0,136,0.831499


In [179]:
pd.DataFrame(data).to_csv('hs_max.csv', index=False)

## All values from hyperparameter search

In [180]:
data = []

In [181]:
for dataset in datasets:
    value = {'dataset': dataset, 'model': 'no_reducer', 'percentage': 100, 'score': no_reducer_score}
    for model in models:
        value['model'] = model
        percentage_data = []
        for percentage in percentages:
            value['percentage'] = percentage
            experiment_name = f'P10_{model}_{dataset}_p{percentage}'
            dim_col_name = 'config/umap_ncomp' if model == 'umap' else 'config/latent_dim' 
            if model == 'umap':
                experiment_name = experiment_name[4:]
            try:
                experiment_data = pd.read_csv(f'../experiments/{experiment_name}/data.csv')
                experiment_data = experiment_data[[dim_col_name, 'score']][experiment_data['score'] != -0.1].reset_index(drop=True)
                experiment_data.columns = ['dim', 'score']
                experiment_data['dataset'] = dataset
                experiment_data['model'] = model
                experiment_data['percent'] = percentage
                experiment_data = experiment_data[['dataset', 'model', 'percent', 'dim', 'score']]
                data.append(experiment_data)
                # data_max_value = experiment_data['score'].max()
            except:
                # data_max_value = 0
                print(f'No data for {experiment_name}')
            # value['score'] = data_max_value
            # data.append(value.copy())

In [182]:
df = pd.concat(data).reset_index(drop=True)
df

Unnamed: 0,dataset,model,percent,dim,score
0,kuhar,umap,2.5,3,0.197887
1,kuhar,umap,2.5,8,0.335681
2,kuhar,umap,2.5,4,0.230751
3,kuhar,umap,2.5,8,0.272300
4,kuhar,umap,2.5,4,0.208920
...,...,...,...,...,...
211578,realworld_waist,convtae,200.0,509,0.757821
211579,realworld_waist,convtae,200.0,540,0.730798
211580,realworld_waist,convtae,200.0,585,0.778263
211581,realworld_waist,convtae,200.0,560,0.698706


In [183]:
df.to_csv('hs_all.csv', index=False)