In [1]:
import sys, os
from pathlib import Path
home = str(Path.home())
py_dir = home+'/repositories/ai-x/core/parameter_analysis'
sys.path.insert(0, py_dir)
from parameter_analysis import *
import pprint


# show multiple outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# removes warning
import pandas as pd 
pd.options.mode.chained_assignment = None  # default='warn'

output_dir = "output_dir"
metric = "R2"
inclements = 0.035

modeldir = home+"/repositories/ai-DR/models/parameter_analysis/models4"
df = get_data(modeldir)

Sample of 7000



In [2]:
dict_para = criteria(df, metric, inclements)
dict(sorted(dict_para.items()))

{'batch_size': [64, 128, 512],
 'dropout': [0.0, 0.1, 0.2],
 'epochs': [640, 2560, 5120],
 'hidden_layers': [5, 6, 8],
 'learning_rate': [1e-06, 3.1622776601683788e-06, 1e-05],
 'neurons': [5120, 7168, 8192]}

In [3]:
# metric = "R2"

# for run in range(1, 4):
#     df = get_data(modeldir, 4)    
#     dict_para = criteria(df, metric, inclements)
#     pprint.pprint(dict_para)
#     if run == 1:
#         previous_dict_para = dict_para
#     else:
#         if previous_dict_para == dict_para:
#             print("")
#             print(f"!!MATCH!!: parameters in run {run-1} and run {run} match")
#         else:
#             previous_dict_para = dict_para

# Subsampling

In [4]:
def compute_composite_score(row, weights, maximize, minimize, time_penalty_weight, negative_r2_penalty):
    score = 0
    for metric in maximize:
        score += row[metric] * weights.get(metric, 1)
    for metric in minimize:
        score -= row[metric] * weights.get(metric, 1)
    # Add penalty for longer times
    score -= row['time'] * time_penalty_weight
    # Add penalty for negative R2
    if row['R2'] < 0.5:
        score -= negative_r2_penalty
    return score


In [5]:
weights = {
    'R2': 2.0,
    'RMSE': 1.0
}
time_penalty_weight = 0.001  
negative_r2_penalty = 10.0   
maximize = ['R2']
minimize = ['RMSE']
x_percent = 0.15

In [6]:
# Get the current user's home directory
modeldir = home+"/repositories/ai-DR/models/parameter_analysis/models4"
# modeldir = "/home/wons2/repositories/ai-DR/models/parameter_analysis/models4"
df = get_data(modeldir)
metric = "composite_score"

df['composite_score'] = df.apply(lambda row: compute_composite_score(row, weights, maximize, minimize, time_penalty_weight, negative_r2_penalty), axis=1)
df = df.sort_values(by = metric, ascending = False)
df = df.reset_index(drop = True)

n_top = int(len(df)*x_percent)
top_composite_score = max(df[metric].head(n_top))
bottom_composite_score = min(df[metric].head(n_top))
inclements = top_composite_score - bottom_composite_score
inclements

Sample of 7000



0.12316179080849232

In [9]:
top_composite_score, bottom_composite_score

(0.8598113757075933, 0.7366495848991009)

In [7]:
dict_para = criteria2(df, "composite_score", inclements = inclements)
dict_para

{'epochs': [160, 640, 1280],
 'hidden_layers': [1, 2],
 'neurons': [6144, 7168, 8192],
 'learning_rate': [3.1622776601683788e-06, 1e-05, 3.1622776601683795e-05],
 'batch_size': [256, 512, 1024],
 'dropout': [0.0, 0.1]}

In [8]:
def subsample2(df, n_sample, n_states, metric, inclements):
    all_dict = []
    for state in range(0, n_states):
        subset_df = df.sample(n_sample, random_state=state)
        dict_para = criteria(subset_df, metric, inclements)
        all_dict.append(dict_para)
    return all_dict


In [4]:
run = 10
df = get_data(modeldir, pretreat = False)    
dict_para = criteria(df, metric, inclements)
dict(sorted(dict_para.items()))

Sample of 7000



{'batch_size': [64, 128, 512],
 'dropout': [0.0, 0.1, 0.2],
 'epochs': [640, 2560, 5120],
 'hidden_layers': [5, 6, 8],
 'learning_rate': [1e-06, 3.1622776601683788e-06, 1e-05],
 'neurons': [5120, 7168, 8192]}

In [5]:
run = 10
n_states = 100
ls_modes = []

In [6]:
n_sample = 500

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 8, 'mode': [(64, 128, 512), (32, 64, 128)]},
 'dropout': {'count': 30, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 21, 'mode': [(640, 1280, 2560)]},
 'hidden_layers': {'count': 11, 'mode': [(1, 4, 6)]},
 'learning_rate': {'count': 49,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 18, 'mode': [(5120, 7168, 8192)]}}


In [7]:
n_sample = 1000

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 10, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 42, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 33, 'mode': [(640, 1280, 2560)]},
 'hidden_layers': {'count': 14, 'mode': [(1, 6, 8)]},
 'learning_rate': {'count': 61,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 24, 'mode': [(5120, 7168, 8192)]}}


In [8]:
n_sample = 1500

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 11, 'mode': [(16, 128, 512)]},
 'dropout': {'count': 51, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 24, 'mode': [(640, 1280, 2560)]},
 'hidden_layers': {'count': 19, 'mode': [(1, 6, 8)]},
 'learning_rate': {'count': 68,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 35, 'mode': [(5120, 7168, 8192)]}}


In [9]:
n_sample = 2000

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 14, 'mode': [(16, 128, 512)]},
 'dropout': {'count': 61, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 24, 'mode': [(640, 1280, 2560)]},
 'hidden_layers': {'count': 17, 'mode': [(1, 4, 6)]},
 'learning_rate': {'count': 68,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 39, 'mode': [(5120, 7168, 8192)]}}


In [10]:
n_sample = 2500

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 19, 'mode': [(16, 128, 512), (64, 128, 512)]},
 'dropout': {'count': 67, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 29, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 18, 'mode': [(1, 6, 8)]},
 'learning_rate': {'count': 71,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 41, 'mode': [(5120, 6144, 7168)]}}


In [11]:
n_sample = 3000

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 18, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 77, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 37, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 16, 'mode': [(5, 6, 8)]},
 'learning_rate': {'count': 75,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 44, 'mode': [(5120, 7168, 8192)]}}


In [12]:
n_sample = 3500

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 25, 'mode': [(16, 128, 512)]},
 'dropout': {'count': 82, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 38, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 18, 'mode': [(4, 6, 8)]},
 'learning_rate': {'count': 77,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 53, 'mode': [(5120, 7168, 8192)]}}


In [13]:
n_sample = 4000

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 29, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 77, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 42, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 21, 'mode': [(4, 6, 8)]},
 'learning_rate': {'count': 82,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 53, 'mode': [(5120, 7168, 8192)]}}


In [14]:
n_sample = 4500

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 27, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 91, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 38, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 24, 'mode': [(5, 6, 8)]},
 'learning_rate': {'count': 85,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 59, 'mode': [(5120, 7168, 8192)]}}


In [15]:
n_sample = 5000

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 35, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 91, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 44, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 31, 'mode': [(4, 6, 8)]},
 'learning_rate': {'count': 90,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 57, 'mode': [(5120, 7168, 8192)]}}


In [16]:
n_sample = 5500

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 46, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 95, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 45, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 37, 'mode': [(5, 6, 8)]},
 'learning_rate': {'count': 98,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 62, 'mode': [(5120, 7168, 8192)]}}


In [17]:
n_sample = 6000

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 53, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 99, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 63, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 41, 'mode': [(5, 6, 8)]},
 'learning_rate': {'count': 100,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 68, 'mode': [(5120, 7168, 8192)]}}


In [18]:
n_sample = 6500

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 56, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 100, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 65, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 59, 'mode': [(5, 6, 8)]},
 'learning_rate': {'count': 100,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 88, 'mode': [(5120, 7168, 8192)]}}


In [19]:
n_sample = 7000

all_dict = subsample(df, n_sample, n_states, metric, inclements)
modes_result = find_mode_hyperparameters(all_dict)
ls_modes.append(modes_result)
pprint.pprint(modes_result)

{'batch_size': {'count': 100, 'mode': [(64, 128, 512)]},
 'dropout': {'count': 100, 'mode': [(0.0, 0.1, 0.2)]},
 'epochs': {'count': 100, 'mode': [(640, 2560, 5120)]},
 'hidden_layers': {'count': 100, 'mode': [(5, 6, 8)]},
 'learning_rate': {'count': 100,
                   'mode': [(1e-06, 3.1622776601683788e-06, 1e-05)]},
 'neurons': {'count': 100, 'mode': [(5120, 7168, 8192)]}}


In [20]:
dict_output = get_majority_voting(ls_modes)
dict_output

{'batch_size': [(64, 128, 512)],
 'dropout': [(0.0, 0.1, 0.2)],
 'epochs': [(640, 2560, 5120)],
 'hidden_layers': [(5, 6, 8)],
 'learning_rate': [(1e-06, 3.1622776601683788e-06, 1e-05)],
 'neurons': [(5120, 7168, 8192)]}