In [None]:

from hyperparameter_tuning import *


# Files 
x_train_csv = "dataset/x_train.csv"
y_train_csv = "dataset/y_train.csv"
x_test_csv = "dataset/x_test.csv"





### Additional grid search

**Different feature combinations: augmented and reduced dictionary**

In [13]:
features_dict = {
    # Binary variables
    'BPMEDS': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'TOLDHI2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIABETE3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'HLTHPLN1': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'QLACTLM2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'EXERANY2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'ADDEPEV2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'ASTHMA3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'SMOKE100': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'CVDSTRK3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'HAVARTH3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'CHCCOPD1': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'SEX': {'type': 'binary', 'missing_values': [np.nan]},
    'BPHIGH4': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'USEEQUIP': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'BLIND': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DECIDE': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIFFWALK': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIFFDRES': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIFFALON': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'ALCDAY5': {'type': 'binary', 'missing_values': [np.nan]},
    'CHECKUP1': {'type': 'binary', 'missing_values': [np.nan]},
    'STRENGTH': {'type': 'binary', 'missing_values': [np.nan]},
    'AVG_FRUITS_VEGS': {'type': 'binary', 'missing_values': [np.nan]},
    


    # Categorical variable
    '_RACEGR3': {
        'type': 'categorical',
        'missing_values': [9, np.nan],
        'categories': [1, 2, 3, 4, 5]
    },

    # Numeric variables
    'BMI': {'type': 'numeric', 'missing_values': [7777, 9999, np.nan], 'range': (1, 4)},
    'MENTHLTH': {'type': 'numeric', 'missing_values': [77, 99, np.nan], 'map_value': {88: 0}, 'range': (1, 30)},
    '_AGEG5YR': {'type': 'numeric', 'missing_values': [14, np.nan], 'range': (1, 13)},
    'PHYSHLTH': {'type': 'numeric', 'missing_values': [77, 99, np.nan], 'map_value': {88: 0}, 'range': (1, 30)}
}
features_dict_big = {
    # Binary variables
    'BPMEDS': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'TOLDHI2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIABETE3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'HLTHPLN1': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'QLACTLM2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'EXERANY2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'ADDEPEV2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'ASTHMA3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'SMOKE100': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'CVDSTRK3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'HAVARTH3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'CHCCOPD1': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'SEX': {'type': 'binary', 'missing_values': [np.nan]},
    'BPHIGH4': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'USEEQUIP': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'BLIND': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DECIDE': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIFFWALK': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIFFDRES': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'DIFFALON': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
    'ALCDAY5': {'type': 'binary', 'missing_values': [np.nan]},
    'CHECKUP1': {'type': 'binary', 'missing_values': [np.nan]},
    'STRENGTH': {'type': 'binary', 'missing_values': [np.nan]},
    'AVG_FRUITS_VEGS': {'type': 'binary', 'missing_values': [np.nan]},
    '_RFBING5': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_AGE65YR': {'type': 'binary', 'missing_values': [ np.nan]},
     '_RFDRHV5': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_TOTINDA': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_PAINDX1': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_PASTRNG': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_PASTAE1': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_FLSHOT6': {'type': 'binary', 'missing_values': [ 9,np.nan]},
     '_PNEUMO2': {'type': 'binary', 'missing_values': [ 9,np.nan]},
     '_AIDTST3': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_RFBMI5': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_HISPANC': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_CASTHM1': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_LTASTH1': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_RFCHOL': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_RFHYPE5': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_HCVU651': {'type': 'binary', 'missing_values': [9, np.nan]},
     '_RFHLTH': {'type': 'binary', 'missing_values': [9, np.nan]},
     'MEDCOST': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'BLOODCHO': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'ASTHNOW': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'CHCSCNCR': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'CHCCOPD1': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'CHCOCNCR': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'CHCKIDNY': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'VETERAN3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'INTERNET': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'LMTJOIN3': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     'ARTHDIS2': {'type': 'binary', 'missing_values': [7, 9, np.nan]},
     



    # Categorical variable
    '_RACEGR3': {
        'type': 'categorical',
        'missing_values': [9, np.nan],
        'categories': [1, 2, 3, 4, 5]
    },

    # Numeric variables
    #'_DRNKWEK': {'type': 'numeric', 'missing_values': [99900, np.nan], 'range': (1, 6)},
    '_PAREC1': {'type': 'numeric', 'missing_values': [9, np.nan], 'range': (1, 6)},
    '_PA150R2': {'type': 'numeric', 'missing_values': [ 9,np.nan], 'range': (1, 6)},
    '_PACAT1': {'type': 'numeric', 'missing_values': [9], 'range': (1, 6)},
    'FC60_': {'type': 'numeric', 'missing_values': [ 99900], 'range': (1, 6)},
    'MAXVO2_': {'type': 'numeric', 'missing_values': [ 99900], 'range': (1, 6)},
    'EDUCA': {'type': 'numeric', 'missing_values': [9, np.nan], 'range': (1, 6)},  # Education level
    '_INCOMG': {'type': 'numeric', 'missing_values': [9, np.nan], 'range': (1, 6)},
    'BMI': {'type': 'numeric', 'missing_values': [7777, 9999, np.nan], 'range': (1, 4)},
    'MENTHLTH': {'type': 'numeric', 'missing_values': [77, 99, np.nan], 'map_value': {88: 0}, 'range': (1, 30)},
    '_AGEG5YR': {'type': 'numeric', 'missing_values': [14, np.nan], 'range': (1, 13)},
    'PHYSHLTH': {'type': 'numeric', 'missing_values': [77, 99, np.nan], 'map_value': {88: 0}, 'range': (1, 30)}
}

In [9]:
# Read x_train and y_train
x_train, features = csv_to_array("dataset/x_train.csv")
Y_train, labels = csv_to_array("dataset/y_train.csv")
x_test , ids = csv_to_array("dataset/x_test.csv", id=True)
# Flatten y_train to convert into 1d array
Y_train = Y_train.flatten()
# Y modification
Y_train = np.where(Y_train == -1, 0, Y_train)

In [25]:
#  DICT (reduced)
X_train, features = X_preprocessing(x_train, features)
X_train_balanced, Y_train_balanced = balance_dataset(X_train, Y_train)
X_train_transformed = transform_features_column_median(X_train_balanced, features_dict, features)

gamma_values = [ 0.01, 0.001, 0.0005, 0.0001]  # Learning rate
max_iters_values = [250, 500, 1000, 1500]  # Number of iterations
l2_reg_values = [0.2, 0.1, 0.01]  # L2 regularization parameter
batch_size_values = [ 256, 512, 2048]  # Batch sizes for SGD
prob_threshold_values = [0.5]
beta1_values = [0.9]
beta2_values = [0.98]
decay_rates = [0.96]
decay_step_values = [100]

# Initialize a list to store results for each combination
grid_search_results = []
K = 5


# Perform grid search
for gamma, max_iters, batch, param_l2, prob_threshold, beta1, beta2, decay_rate, decay_steps in itertools.product(
        gamma_values, max_iters_values, batch_size_values, l2_reg_values,  prob_threshold_values, beta1_values, beta2_values, decay_rates, decay_step_values):
    
    # Run evaluation with current parameter combination
    F1_score = K_fold_cross_validation_f1_score(X_train_transformed, Y_train_balanced, K, 
       seed=5,
       max_iters=max_iters,
       gamma=gamma,
       reg_norm= ['l2',param_l2],
       prob_threshold=prob_threshold,
       batch_size=batch,
       beta1=beta1,
        beta2=beta2,
        decay_rate=decay_rate, 
        decay_steps=decay_steps
    )

    

    # Store the result and parameters for analysis
    grid_search_results.append({
        'gamma': gamma,
        'max_iters': max_iters,
        'batch_size': batch, 
        'reg_norm': param_l2,
        'prob_threshold': prob_threshold,
        'beta1': beta1,
        'beta2': beta2,
        'decay_rate': decay_rate,
        'decay_steps': decay_steps,
        'F1_score': F1_score
    })



  average_column = np.nanmean(selected_columns, axis=1)
100%|██████████| 5/5 [00:29<00:00,  5.96s/it]
100%|██████████| 5/5 [00:35<00:00,  7.01s/it]
100%|██████████| 5/5 [00:32<00:00,  6.59s/it]
100%|██████████| 5/5 [00:26<00:00,  5.31s/it]
100%|██████████| 5/5 [00:26<00:00,  5.30s/it]
100%|██████████| 5/5 [00:25<00:00,  5.20s/it]
100%|██████████| 5/5 [00:21<00:00,  4.38s/it]
100%|██████████| 5/5 [00:22<00:00,  4.51s/it]
100%|██████████| 5/5 [00:21<00:00,  4.32s/it]
100%|██████████| 5/5 [00:55<00:00, 11.11s/it]
100%|██████████| 5/5 [01:03<00:00, 12.65s/it]
100%|██████████| 5/5 [00:57<00:00, 11.53s/it]
100%|██████████| 5/5 [00:42<00:00,  8.59s/it]
100%|██████████| 5/5 [00:47<00:00,  9.46s/it]
100%|██████████| 5/5 [00:53<00:00, 10.61s/it]
100%|██████████| 5/5 [00:43<00:00,  8.78s/it]
100%|██████████| 5/5 [00:43<00:00,  8.79s/it]
100%|██████████| 5/5 [00:43<00:00,  8.75s/it]
100%|██████████| 5/5 [02:08<00:00, 25.63s/it]
100%|██████████| 5/5 [01:53<00:00, 22.73s/it]
100%|██████████| 5/5 [02

In [26]:
# Sort grid search results by F1_score in descending order (best to worst)
sorted_results = sorted(grid_search_results, key=lambda x: x['F1_score'], reverse=True)

# Display sorted results
for i, result in enumerate(sorted_results):
    print(f"Rank {i+1}:")
    print(f"  F1_score: {result['F1_score']}")
    print(f"  gamma: {result['gamma']}")
    print(f"  max_iters: {result['max_iters']}")
    print(f"  batch_size: {result['batch_size']}")
    print(f"  reg_norm: {result['reg_norm']}")
    print(f"  prob_threshold: {result['prob_threshold']}")
    print(f"  beta1: {result['beta1']}")
    print(f"  beta2: {result['beta2']}")
    print(f"  decay_rate: {result['decay_rate']}")
    print(f"  decay_steps: {result['decay_steps']}")
    print("\n")

Rank 1:
  F1_score: 0.7297562656384822
  gamma: 0.0001
  max_iters: 500
  batch_size: 256
  reg_norm: 0.1
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 2:
  F1_score: 0.7297292031551234
  gamma: 0.0001
  max_iters: 500
  batch_size: 256
  reg_norm: 0.2
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 3:
  F1_score: 0.7297097504072538
  gamma: 0.0001
  max_iters: 500
  batch_size: 256
  reg_norm: 0.01
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 4:
  F1_score: 0.72938087080485
  gamma: 0.0001
  max_iters: 1000
  batch_size: 512
  reg_norm: 0.2
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 5:
  F1_score: 0.7293283091039728
  gamma: 0.0001
  max_iters: 1000
  batch_size: 512
  reg_norm: 0.1
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 6:
  F1_score: 0.72

In [28]:
#  DICT AUGMENTED
X_train, features = X_preprocessing(x_train, features)
X_train_balanced, Y_train_balanced = balance_dataset(X_train, Y_train)
X_train_transformed = transform_features_column_median(X_train_balanced, features_dict_big, features)

gamma_values = [ 0.01, 0.001, 0.0005, 0.0001]  # Learning rate
max_iters_values = [250, 500, 1000, 1500]  # Number of iterations
l2_reg_values = [0.2, 0.1, 0.01]  # L2 regularization parameter
batch_size_values = [ 256, 512, 2048]  # Batch sizes for SGD
prob_threshold_values = [0.5]
beta1_values = [0.9]
beta2_values = [0.98]
decay_rates = [0.96]
decay_step_values = [100]

# Initialize a list to store results for each combination
grid_search_results = []
K = 5


# Perform grid search
for gamma, max_iters, batch, param_l2, prob_threshold, beta1, beta2, decay_rate, decay_steps in itertools.product(
        gamma_values, max_iters_values, batch_size_values, l2_reg_values,  prob_threshold_values, beta1_values, beta2_values, decay_rates, decay_step_values):
    
    # Run evaluation with current parameter combination
    F1_score = K_fold_cross_validation_f1_score(X_train_transformed, Y_train_balanced, K, 
       seed=5,
       max_iters=max_iters,
       gamma=gamma,
       reg_norm= ['l2',param_l2],
       prob_threshold=prob_threshold,
       batch_size=batch,
       beta1=beta1,
        beta2=beta2,
        decay_rate=decay_rate, 
        decay_steps=decay_steps
    )

    

    # Store the result and parameters for analysis
    grid_search_results.append({
        'gamma': gamma,
        'max_iters': max_iters,
        'batch_size': batch, 
        'reg_norm': param_l2,
        'prob_threshold': prob_threshold,
        'beta1': beta1,
        'beta2': beta2,
        'decay_rate': decay_rate,
        'decay_steps': decay_steps,
        'F1_score': F1_score
    })


  average_column = np.nanmean(selected_columns, axis=1)
100%|██████████| 5/5 [00:32<00:00,  6.49s/it]
100%|██████████| 5/5 [00:33<00:00,  6.71s/it]
100%|██████████| 5/5 [00:33<00:00,  6.70s/it]
100%|██████████| 5/5 [00:29<00:00,  5.94s/it]
100%|██████████| 5/5 [00:29<00:00,  5.92s/it]
100%|██████████| 5/5 [00:29<00:00,  5.88s/it]
100%|██████████| 5/5 [00:26<00:00,  5.22s/it]
100%|██████████| 5/5 [00:26<00:00,  5.28s/it]
100%|██████████| 5/5 [00:26<00:00,  5.29s/it]
100%|██████████| 5/5 [01:07<00:00, 13.58s/it]
100%|██████████| 5/5 [01:07<00:00, 13.58s/it]
100%|██████████| 5/5 [01:08<00:00, 13.62s/it]
100%|██████████| 5/5 [01:00<00:00, 12.01s/it]
100%|██████████| 5/5 [00:59<00:00, 11.93s/it]
100%|██████████| 5/5 [00:59<00:00, 11.86s/it]
100%|██████████| 5/5 [00:52<00:00, 10.45s/it]
100%|██████████| 5/5 [00:52<00:00, 10.45s/it]
100%|██████████| 5/5 [00:53<00:00, 10.75s/it]
100%|██████████| 5/5 [02:17<00:00, 27.42s/it]
100%|██████████| 5/5 [02:17<00:00, 27.55s/it]
100%|██████████| 5/5 [02

In [29]:
# Sort grid search results by F1_score in descending order (best to worst)
sorted_results = sorted(grid_search_results, key=lambda x: x['F1_score'], reverse=True)

# Display sorted results
for i, result in enumerate(sorted_results):
    print(f"Rank {i+1}:")
    print(f"  F1_score: {result['F1_score']}")
    print(f"  gamma: {result['gamma']}")
    print(f"  max_iters: {result['max_iters']}")
    print(f"  batch_size: {result['batch_size']}")
    print(f"  reg_norm: {result['reg_norm']}")
    print(f"  prob_threshold: {result['prob_threshold']}")
    print(f"  beta1: {result['beta1']}")
    print(f"  beta2: {result['beta2']}")
    print(f"  decay_rate: {result['decay_rate']}")
    print(f"  decay_steps: {result['decay_steps']}")
    print("\n")

Rank 1:
  F1_score: 0.7310476815563055
  gamma: 0.01
  max_iters: 1500
  batch_size: 256
  reg_norm: 0.2
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 2:
  F1_score: 0.7306840135353248
  gamma: 0.01
  max_iters: 1500
  batch_size: 256
  reg_norm: 0.1
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 3:
  F1_score: 0.7298749236114407
  gamma: 0.01
  max_iters: 1500
  batch_size: 256
  reg_norm: 0.01
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 4:
  F1_score: 0.7279765031782957
  gamma: 0.01
  max_iters: 250
  batch_size: 256
  reg_norm: 0.2
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 5:
  F1_score: 0.7276688432559046
  gamma: 0.01
  max_iters: 250
  batch_size: 256
  reg_norm: 0.1
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 6:
  F1_score: 0.727068352

In [30]:
#0.01
#  max_iters: 1500
#  batch_size: 256
#  reg_norm: 0.2
#  DICT AUGMENTED
X_train, features = X_preprocessing(x_train, features)
X_train_balanced, Y_train_balanced = balance_dataset(X_train, Y_train)
X_train_transformed = transform_features_column_median(X_train_balanced, features_dict_big, features)

gamma_values = [ 0.01 ]  # Learning rate
max_iters_values = [1500,2000]  # Number of iterations
l2_reg_values = [0.2, 0.3]  # L2 regularization parameter
batch_size_values = [128,  256,]  # Batch sizes for SGD
prob_threshold_values = [0.5]
beta1_values = [0.9]
beta2_values = [0.98]
decay_rates = [0.96]
decay_step_values = [100]

# Initialize a list to store results for each combination
grid_search_results = []
K = 5


# Perform grid search
for gamma, max_iters, batch, param_l2, prob_threshold, beta1, beta2, decay_rate, decay_steps in itertools.product(
        gamma_values, max_iters_values, batch_size_values, l2_reg_values,  prob_threshold_values, beta1_values, beta2_values, decay_rates, decay_step_values):
    
    # Run evaluation with current parameter combination
    F1_score = K_fold_cross_validation_f1_score(X_train_transformed, Y_train_balanced, K, 
       seed=5,
       max_iters=max_iters,
       gamma=gamma,
       reg_norm= ['l2',param_l2],
       prob_threshold=prob_threshold,
       batch_size=batch,
       beta1=beta1,
        beta2=beta2,
        decay_rate=decay_rate, 
        decay_steps=decay_steps
    )

    

    # Store the result and parameters for analysis
    grid_search_results.append({
        'gamma': gamma,
        'max_iters': max_iters,
        'batch_size': batch, 
        'reg_norm': param_l2,
        'prob_threshold': prob_threshold,
        'beta1': beta1,
        'beta2': beta2,
        'decay_rate': decay_rate,
        'decay_steps': decay_steps,
        'F1_score': F1_score
    })


  average_column = np.nanmean(selected_columns, axis=1)
100%|██████████| 5/5 [04:26<00:00, 53.28s/it]
100%|██████████| 5/5 [04:10<00:00, 50.18s/it]
100%|██████████| 5/5 [04:05<00:00, 49.18s/it]
100%|██████████| 5/5 [04:14<00:00, 50.97s/it]
100%|██████████| 5/5 [06:58<00:00, 83.65s/it]
100%|██████████| 5/5 [07:01<00:00, 84.24s/it]
100%|██████████| 5/5 [05:38<00:00, 67.67s/it]
100%|██████████| 5/5 [05:42<00:00, 68.55s/it]


In [31]:
# Sort grid search results by F1_score in descending order (best to worst)
sorted_results = sorted(grid_search_results, key=lambda x: x['F1_score'], reverse=True)

# Display sorted results
for i, result in enumerate(sorted_results):
    print(f"Rank {i+1}:")
    print(f"  F1_score: {result['F1_score']}")
    print(f"  gamma: {result['gamma']}")
    print(f"  max_iters: {result['max_iters']}")
    print(f"  batch_size: {result['batch_size']}")
    print(f"  reg_norm: {result['reg_norm']}")
    print(f"  prob_threshold: {result['prob_threshold']}")
    print(f"  beta1: {result['beta1']}")
    print(f"  beta2: {result['beta2']}")
    print(f"  decay_rate: {result['decay_rate']}")
    print(f"  decay_steps: {result['decay_steps']}")
    print("\n")

Rank 1:
  F1_score: 0.7311078916069256
  gamma: 0.01
  max_iters: 1500
  batch_size: 256
  reg_norm: 0.3
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 2:
  F1_score: 0.7310476815563055
  gamma: 0.01
  max_iters: 1500
  batch_size: 256
  reg_norm: 0.2
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 3:
  F1_score: 0.7211102721170545
  gamma: 0.01
  max_iters: 2000
  batch_size: 128
  reg_norm: 0.2
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 4:
  F1_score: 0.7208300146363207
  gamma: 0.01
  max_iters: 2000
  batch_size: 128
  reg_norm: 0.3
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 5:
  F1_score: 0.7144461458001349
  gamma: 0.01
  max_iters: 2000
  batch_size: 256
  reg_norm: 0.2
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 6:
  F1_score: 0.71419843

In [32]:
#0.01
#  max_iters: 1500
#  batch_size: 256
#  reg_norm: 0.2
#  DICT AUGMENTED
X_train, features = X_preprocessing(x_train, features)
X_train_balanced, Y_train_balanced = balance_dataset(X_train, Y_train)
X_train_transformed = transform_features_column_median(X_train_balanced, features_dict_big, features)

gamma_values = [ 0.01, 0.02 ]  # Learning rate
max_iters_values = [1500,]  # Number of iterations
l2_reg_values = [0.5, 0.4]  # L2 regularization parameter
batch_size_values = [ 256,512]  # Batch sizes for SGD
prob_threshold_values = [0.5]
beta1_values = [0.9]
beta2_values = [0.98]
decay_rates = [0.96]
decay_step_values = [100]

# Initialize a list to store results for each combination
grid_search_results = []
K = 5


# Perform grid search
for gamma, max_iters, batch, param_l2, prob_threshold, beta1, beta2, decay_rate, decay_steps in itertools.product(
        gamma_values, max_iters_values, batch_size_values, l2_reg_values,  prob_threshold_values, beta1_values, beta2_values, decay_rates, decay_step_values):
    
    # Run evaluation with current parameter combination
    F1_score = K_fold_cross_validation_f1_score(X_train_transformed, Y_train_balanced, K, 
       seed=5,
       max_iters=max_iters,
       gamma=gamma,
       reg_norm= ['l2',param_l2],
       prob_threshold=prob_threshold,
       batch_size=batch,
       beta1=beta1,
        beta2=beta2,
        decay_rate=decay_rate, 
        decay_steps=decay_steps
    )

    

    # Store the result and parameters for analysis
    grid_search_results.append({
        'gamma': gamma,
        'max_iters': max_iters,
        'batch_size': batch, 
        'reg_norm': param_l2,
        'prob_threshold': prob_threshold,
        'beta1': beta1,
        'beta2': beta2,
        'decay_rate': decay_rate,
        'decay_steps': decay_steps,
        'F1_score': F1_score
    })

  average_column = np.nanmean(selected_columns, axis=1)
100%|██████████| 5/5 [04:19<00:00, 51.92s/it]
100%|██████████| 5/5 [04:14<00:00, 50.85s/it]
100%|██████████| 5/5 [03:39<00:00, 43.94s/it]
100%|██████████| 5/5 [03:39<00:00, 43.99s/it]
100%|██████████| 5/5 [04:12<00:00, 50.41s/it]
100%|██████████| 5/5 [03:52<00:00, 46.45s/it]
100%|██████████| 5/5 [03:15<00:00, 39.07s/it]
100%|██████████| 5/5 [03:52<00:00, 46.49s/it]


In [33]:
# Sort grid search results by F1_score in descending order (best to worst)
sorted_results = sorted(grid_search_results, key=lambda x: x['F1_score'], reverse=True)

# Display sorted results
for i, result in enumerate(sorted_results):
    print(f"Rank {i+1}:")
    print(f"  F1_score: {result['F1_score']}")
    print(f"  gamma: {result['gamma']}")
    print(f"  max_iters: {result['max_iters']}")
    print(f"  batch_size: {result['batch_size']}")
    print(f"  reg_norm: {result['reg_norm']}")
    print(f"  prob_threshold: {result['prob_threshold']}")
    print(f"  beta1: {result['beta1']}")
    print(f"  beta2: {result['beta2']}")
    print(f"  decay_rate: {result['decay_rate']}")
    print(f"  decay_steps: {result['decay_steps']}")
    print("\n")

Rank 1:
  F1_score: 0.7306400959844421
  gamma: 0.01
  max_iters: 1500
  batch_size: 512
  reg_norm: 0.5
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 2:
  F1_score: 0.7305334126003304
  gamma: 0.01
  max_iters: 1500
  batch_size: 512
  reg_norm: 0.4
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 3:
  F1_score: 0.7300047780160678
  gamma: 0.01
  max_iters: 1500
  batch_size: 256
  reg_norm: 0.4
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 4:
  F1_score: 0.7299970513722146
  gamma: 0.01
  max_iters: 1500
  batch_size: 256
  reg_norm: 0.5
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 5:
  F1_score: 0.7253710446004609
  gamma: 0.02
  max_iters: 1500
  batch_size: 512
  reg_norm: 0.4
  prob_threshold: 0.5
  beta1: 0.9
  beta2: 0.98
  decay_rate: 0.96
  decay_steps: 100


Rank 6:
  F1_score: 0.72532049