In [2]:
import pandas as pd
import os

# solo
mtry_df = pd.read_csv('resources/results_solo_grid/mtry.csv')
num_trees_df = pd.read_csv('resources/results_solo_grid/num_trees.csv')
min_node_size_df = pd.read_csv('resources/results_solo_grid/min_node_size.csv')
replace_df = pd.read_csv('resources/results_solo_grid/replace.csv')
sample_fraction_df = pd.read_csv('resources/results_solo_grid/sample_fraction.csv')

# pairwise
directory = "resources/results_pairwise_random"
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        filepath = os.path.join(directory, filename)
        # Remove the .csv from the filename to use as the variable name
        var_name = filename[:-4] + '_df'
        # Load the dataframe and assign it to a variable with the name in var_name
        df = pd.read_csv(filepath)
        # Drop the 'Unnamed: 0' column
        df = df.drop('Unnamed: 0', axis=1)
        globals()[var_name] = df

# all 5
results_all_bayesian_df = pd.read_csv('resources/results_all_search_algo/tuning_results.csv')
results_all_random_df = pd.read_csv('resources/results_all_random/results.csv')

# rename columns '_' --> '.' 
column_mapping = {col: col.replace('_', '.') for col in results_all_random_df.columns if col not in ['runtime_training', 'runtime_prediction']}
results_all_random_df.rename(columns=column_mapping, inplace=True)
results_all_random_df.columns

Index(['Unnamed: 0', 'num.trees', 'sample.fraction', 'mtry', 'replace',
       'min.node.size', 'mse', 'runtime_training', 'runtime_prediction'],
      dtype='object')

# Comparison of best hyperparameter combinations

In [11]:
mtry_df

Unnamed: 0,mtry,mse,runtime_training,runtime_prediction
0,1,1671201000.0,0.116,0.034
1,2,945790700.0,0.168,0.034
2,3,817644200.0,0.262,0.042
3,4,735703200.0,0.282,0.038
4,5,720560000.0,0.302,0.038
5,6,703286800.0,0.306,0.046
6,7,679958400.0,0.338,0.044
7,8,696809300.0,0.368,0.044
8,9,706247400.0,0.416,0.042
9,10,669846000.0,0.414,0.044


In [3]:
# Solo values

pd.options.display.float_format = '{:.2f}'.format

print(mtry_df.loc[mtry_df['mse'].idxmin()])
print(num_trees_df.loc[num_trees_df['mse'].idxmin()])
print(min_node_size_df.loc[min_node_size_df['mse'].idxmin()])
print(replace_df.loc[replace_df['mse'].idxmin()])
print(sample_fraction_df.loc[sample_fraction_df['mse'].idxmin()])

mtry                        16.00
mse                  618131360.44
runtime_training             0.68
runtime_prediction           0.05
Name: 15, dtype: float64
num_trees                  148.00
mse                  654037329.28
runtime_training             0.12
runtime_prediction           0.03
Name: 147, dtype: float64
min.node.size                4.00
mse                  658493738.00
runtime_training             0.40
runtime_prediction           0.04
Name: 3, dtype: float64
replace                      True
mse                  666984929.06
runtime_training             0.36
runtime_prediction           0.04
Name: 0, dtype: object
sample.fraction              0.92
mse                  661497672.34
runtime_training             0.31
runtime_prediction           0.04
Name: 91, dtype: float64


In [4]:
# Pairwise Values 

print(mtry_min_node_size_df.loc[mtry_df['mse'].idxmin()])
print(mtry_replace_df.loc[mtry_df['mse'].idxmin()])
print(mtry_sample_fraction_df.loc[mtry_df['mse'].idxmin()])
print(num_trees_min_node_size_df.loc[mtry_df['mse'].idxmin()])
print(num_trees_mtry_df.loc[mtry_df['mse'].idxmin()])
print(num_trees_replace_df.loc[mtry_df['mse'].idxmin()])
print(num_trees_sample_fraction_df.loc[mtry_df['mse'].idxmin()])
print(min_node_size_replace_df.loc[mtry_df['mse'].idxmin()])
print(sample_fraction_min_node_size_df.loc[mtry_df['mse'].idxmin()])
print(sample_fraction_replace_df.loc[mtry_df['mse'].idxmin()])

num_trees                       NaN
sample_fraction                 NaN
mtry                           4.00
replace                         NaN
min_node_size                  9.00
mse                  10357549366.65
runtime_training               0.23
runtime_prediction             0.03
Name: 15, dtype: float64
num_trees                       NaN
sample_fraction                 NaN
mtry                          35.00
replace                        0.00
min_node_size                   NaN
mse                  11278455703.92
runtime_training               0.81
runtime_prediction             0.04
Name: 15, dtype: float64
num_trees                       NaN
sample_fraction                0.69
mtry                          59.00
replace                         NaN
min_node_size                   NaN
mse                  10773599407.05
runtime_training               1.16
runtime_prediction             0.03
Name: 15, dtype: float64
num_trees                   1562.00
sample_fraction          

In [5]:
# Random Search All 5 Hyperparameters
print(results_all_random_df.loc[mtry_df['mse'].idxmin()])

Unnamed: 0              elapsed15
num.trees                      70
sample.fraction              0.72
mtry                           27
replace                     False
min.node.size                  16
mse                  732541818.25
runtime_training             0.07
runtime_prediction           0.02
Name: 15, dtype: object
