In [1]:
import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load data
df = pd.read_csv('Churn_Modelling.csv')

# Prepossing The Data

In [3]:
# Drop unnecessary columns
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [4]:
# Encode categorical variables
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])
    
# One-hot encode Geography
geo_encoder = OneHotEncoder(sparse_output=False)
geo_encoded = geo_encoder.fit_transform(df['Geography'].values.reshape(-1, 1))
geo_df = pd.DataFrame(geo_encoded, columns=['France', 'Germany', 'Spain'])
df = pd.concat([df.drop('Geography', axis=1), geo_df], axis=1)

In [5]:
# Save The Encoded Data In Pickle File :
with open('lable_Encoding_gender.pkl','wb') as file:
    pickle.dump(le_gender,file)
    
with open('One_Hot_Encoding_Geography.pkl','wb') as file:
    pickle.dump(geo_encoder,file)    

In [6]:
# Split The Data Into Train & Test :
X=df.drop(['Exited'],axis=1)
y=df['Exited']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=50)
X_train.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,France,Germany,Spain
5189,826,1,41,5,146466.46,2,0,0,180934.67,0.0,0.0,1.0
7969,637,1,49,2,108204.52,1,1,0,169037.84,0.0,1.0,0.0
9039,545,0,44,1,0.0,2,1,1,82614.89,0.0,0.0,1.0
5208,779,0,38,7,0.0,2,1,1,138542.87,0.0,0.0,1.0
506,691,1,30,7,116927.89,1,1,0,21198.39,0.0,1.0,0.0


In [14]:
# Standardization (or z-score normalization) of the features data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save The StandardScaler Object In Pickle File :
with open('Feature_scaling.pkl','wb') as file:
    pickle.dump(scaler,file)

# Build Model Using Pytorch

In [11]:
# Model definition
class SalaryPredictor(nn.Module):
    def __init__(self, input_dim, nodes, layers, dropout_rate):
        super().__init__()
        
        # Input layer
        layers_list = [
            nn.Linear(input_dim, nodes),
            nn.BatchNorm1d(nodes),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        ]
        
        # Hidden layers
        current_nodes = nodes
        for _ in range(layers):
            next_nodes = max(current_nodes // 2, 64)
            layers_list.extend([
                nn.Linear(current_nodes, next_nodes),
                nn.BatchNorm1d(next_nodes),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            ])
            current_nodes = next_nodes
        
        # Output layer
        layers_list.append(nn.Linear(current_nodes, 1))
        
        self.model = nn.Sequential(*layers_list)
    
    def forward(self, x):
        return self.model(x)

# Create Optuna Object Function

In [17]:
# Optuna objective function
def objective(trial):
    # Hyperparameters to optimize
    layers = trial.suggest_int('layers', 1, 10)
    nodes = trial.suggest_categorical('nodes', [128, 256, 512])
    epochs = trial.suggest_int("epochs", 10, 50, step=10)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    optimizer_name = trial.suggest_categorical("optimizer", ['Adam', 'SGD', 'RMSprop'])
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

    # Prepare data
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train.values)
    
    # Create DataLoader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    # Model, loss
    model = SalaryPredictor(X_train.shape[1], nodes, layers, dropout_rate)
    criterion = nn.BCEWithLogitsLoss()
    
    # Optimizer Selection
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training Loop
    best_val_loss = float('inf')
    for _ in range(epochs):
        model.train()
        total_loss = 0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        avg_loss = total_loss / len(train_loader)
        if avg_loss < best_val_loss:
            best_val_loss = avg_loss
        
        if avg_loss < 1e-4:
            break
    
    # Validation
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.FloatTensor(X_test)
        y_val_tensor = torch.FloatTensor(y_test.values)
        val_predictions = model(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor.unsqueeze(1))
    
    return val_loss.item()

In [21]:
# Create Optuna study object and optimize it
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, show_progress_bar=True) 
# Increased trials for better optimization

[I 2025-01-25 15:15:08,607] A new study created in memory with name: no-name-44706f64-7a8d-411d-b9bd-07bcdf9b457a
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
Best trial: 0. Best value: 0.42309:   2%|▏         | 1/50 [00:15<12:48, 15.68s/it]

[I 2025-01-25 15:15:24,289] Trial 0 finished with value: 0.42308950424194336 and parameters: {'layers': 4, 'nodes': 256, 'epochs': 10, 'learning_rate': 0.00028877658944409876, 'dropout_rate': 0.4655588859112405, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 8.93246828369214e-05}. Best is trial 0 with value: 0.42308950424194336.


Best trial: 0. Best value: 0.42309:   4%|▍         | 2/50 [01:02<27:10, 33.96s/it]

[I 2025-01-25 15:16:11,044] Trial 1 finished with value: 0.5178495049476624 and parameters: {'layers': 5, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.0001497447529996826, 'dropout_rate': 0.1624125345662708, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.0008536127865776314}. Best is trial 0 with value: 0.42308950424194336.


Best trial: 0. Best value: 0.42309:   6%|▌         | 3/50 [01:18<20:10, 25.75s/it]

[I 2025-01-25 15:16:27,030] Trial 2 finished with value: 0.5147718787193298 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.00022609900538026032, 'dropout_rate': 0.29200143917431043, 'batch_size': 128, 'optimizer': 'SGD', 'weight_decay': 1.1049092109372135e-05}. Best is trial 0 with value: 0.42308950424194336.


Best trial: 3. Best value: 0.368117:   8%|▊         | 4/50 [01:54<22:53, 29.85s/it]

[I 2025-01-25 15:17:03,165] Trial 3 finished with value: 0.36811691522598267 and parameters: {'layers': 9, 'nodes': 512, 'epochs': 40, 'learning_rate': 0.00047424528503082206, 'dropout_rate': 0.32142026328922385, 'batch_size': 128, 'optimizer': 'RMSprop', 'weight_decay': 2.6376291221194234e-05}. Best is trial 3 with value: 0.36811691522598267.


Best trial: 4. Best value: 0.359369:  10%|█         | 5/50 [02:46<28:19, 37.76s/it]

[I 2025-01-25 15:17:54,959] Trial 4 finished with value: 0.3593685030937195 and parameters: {'layers': 2, 'nodes': 512, 'epochs': 30, 'learning_rate': 0.002402583734958304, 'dropout_rate': 0.3096540149786298, 'batch_size': 16, 'optimizer': 'SGD', 'weight_decay': 2.5018586687581946e-05}. Best is trial 4 with value: 0.3593685030937195.


Best trial: 4. Best value: 0.359369:  12%|█▏        | 6/50 [04:04<37:53, 51.66s/it]

[I 2025-01-25 15:19:13,600] Trial 5 finished with value: 0.5306689143180847 and parameters: {'layers': 9, 'nodes': 256, 'epochs': 40, 'learning_rate': 0.0016692792479402756, 'dropout_rate': 0.3111615503430979, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.0003859303461412665}. Best is trial 4 with value: 0.3593685030937195.


Best trial: 4. Best value: 0.359369:  14%|█▍        | 7/50 [04:27<30:18, 42.29s/it]

[I 2025-01-25 15:19:36,532] Trial 6 finished with value: 0.5489075779914856 and parameters: {'layers': 3, 'nodes': 256, 'epochs': 40, 'learning_rate': 0.00027380572716488116, 'dropout_rate': 0.34918456969884965, 'batch_size': 128, 'optimizer': 'SGD', 'weight_decay': 0.00014282770927326536}. Best is trial 4 with value: 0.3593685030937195.


Best trial: 7. Best value: 0.354706:  16%|█▌        | 8/50 [04:53<25:57, 37.07s/it]

[I 2025-01-25 15:20:02,490] Trial 7 finished with value: 0.3547055721282959 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.0003217517557740042, 'dropout_rate': 0.3198804994705876, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 5.930170991861681e-05}. Best is trial 7 with value: 0.3547055721282959.


Best trial: 7. Best value: 0.354706:  18%|█▊        | 9/50 [05:26<24:18, 35.58s/it]

[I 2025-01-25 15:20:34,780] Trial 8 finished with value: 0.4832644760608673 and parameters: {'layers': 5, 'nodes': 512, 'epochs': 40, 'learning_rate': 0.00030008067884275885, 'dropout_rate': 0.21731711148408703, 'batch_size': 64, 'optimizer': 'SGD', 'weight_decay': 0.00016183336046588783}. Best is trial 7 with value: 0.3547055721282959.


Best trial: 7. Best value: 0.354706:  20%|██        | 10/50 [05:45<20:22, 30.56s/it]

[I 2025-01-25 15:20:54,099] Trial 9 finished with value: 0.49348771572113037 and parameters: {'layers': 4, 'nodes': 256, 'epochs': 20, 'learning_rate': 0.0002080595589995756, 'dropout_rate': 0.1301440994276146, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 3.4336800065531964e-05}. Best is trial 7 with value: 0.3547055721282959.


Best trial: 7. Best value: 0.354706:  22%|██▏       | 11/50 [06:25<21:41, 33.38s/it]

[I 2025-01-25 15:21:33,880] Trial 10 finished with value: 0.39271390438079834 and parameters: {'layers': 7, 'nodes': 128, 'epochs': 10, 'learning_rate': 0.009544165448583487, 'dropout_rate': 0.4357741096392532, 'batch_size': 16, 'optimizer': 'Adam', 'weight_decay': 5.9580069053600954e-05}. Best is trial 7 with value: 0.3547055721282959.


Best trial: 11. Best value: 0.351259:  24%|██▍       | 12/50 [07:22<25:41, 40.57s/it]

[I 2025-01-25 15:22:30,911] Trial 11 finished with value: 0.35125932097435 and parameters: {'layers': 1, 'nodes': 512, 'epochs': 30, 'learning_rate': 0.0018028199794007215, 'dropout_rate': 0.3940540914069327, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 1.5664648647615327e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  26%|██▌       | 13/50 [08:00<24:29, 39.73s/it]

[I 2025-01-25 15:23:08,698] Trial 12 finished with value: 0.35252201557159424 and parameters: {'layers': 1, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.0008406470007902552, 'dropout_rate': 0.4056492661017387, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 1.3540487207678175e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  28%|██▊       | 14/50 [08:37<23:26, 39.08s/it]

[I 2025-01-25 15:23:46,273] Trial 13 finished with value: 0.35475191473960876 and parameters: {'layers': 1, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.0008674529712123155, 'dropout_rate': 0.4013173758279871, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 1.0968883014158486e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  30%|███       | 15/50 [10:05<31:19, 53.71s/it]

[I 2025-01-25 15:25:13,888] Trial 14 finished with value: 0.37333521246910095 and parameters: {'layers': 7, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.003699291947748628, 'dropout_rate': 0.4883984123942169, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 1.7779036884414848e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  32%|███▏      | 16/50 [10:45<28:08, 49.67s/it]

[I 2025-01-25 15:25:54,173] Trial 15 finished with value: 0.35830289125442505 and parameters: {'layers': 1, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.0008742858058097226, 'dropout_rate': 0.3912145798299863, 'batch_size': 16, 'optimizer': 'Adam', 'weight_decay': 1.6638274588770533e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  34%|███▍      | 17/50 [10:53<20:28, 37.22s/it]

[I 2025-01-25 15:26:02,441] Trial 16 finished with value: 0.35481128096580505 and parameters: {'layers': 3, 'nodes': 512, 'epochs': 10, 'learning_rate': 0.0015747250770791008, 'dropout_rate': 0.39193825411167066, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 4.8957870992879185e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  36%|███▌      | 18/50 [17:20<1:15:54, 142.33s/it]

[I 2025-01-25 15:32:29,474] Trial 17 finished with value: 0.3680262863636017 and parameters: {'layers': 7, 'nodes': 512, 'epochs': 50, 'learning_rate': 0.0063746120361620395, 'dropout_rate': 0.43498630289309403, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 1.042837859811319e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  38%|███▊      | 19/50 [18:05<58:27, 113.13s/it]  

[I 2025-01-25 15:33:14,575] Trial 18 finished with value: 0.35862231254577637 and parameters: {'layers': 2, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.0005495916993870312, 'dropout_rate': 0.2521346500140405, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 0.0002763670227937086}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  40%|████      | 20/50 [19:34<52:56, 105.89s/it]

[I 2025-01-25 15:34:43,588] Trial 19 finished with value: 0.35547131299972534 and parameters: {'layers': 3, 'nodes': 512, 'epochs': 30, 'learning_rate': 0.0034613035163007185, 'dropout_rate': 0.3654845366078157, 'batch_size': 16, 'optimizer': 'Adam', 'weight_decay': 1.8383976138403432e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  42%|████▏     | 21/50 [20:04<40:02, 82.85s/it] 

[I 2025-01-25 15:35:12,713] Trial 20 finished with value: 0.37574511766433716 and parameters: {'layers': 10, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.0014865449697012982, 'dropout_rate': 0.44250968019030923, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 2.996103779600204e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  44%|████▍     | 22/50 [20:23<29:50, 63.95s/it]

[I 2025-01-25 15:35:32,602] Trial 21 finished with value: 0.3576503098011017 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.00010519266606613006, 'dropout_rate': 0.2531207982361994, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 5.296232369141741e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  46%|████▌     | 23/50 [20:43<22:49, 50.73s/it]

[I 2025-01-25 15:35:52,490] Trial 22 finished with value: 0.35250183939933777 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.0005921699363442344, 'dropout_rate': 0.3526459002713075, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 8.607373749654639e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  48%|████▊     | 24/50 [21:29<21:19, 49.22s/it]

[I 2025-01-25 15:36:38,201] Trial 23 finished with value: 0.3513427674770355 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.0005907076439804809, 'dropout_rate': 0.36579318977776454, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 9.467267770915101e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  50%|█████     | 25/50 [22:30<22:00, 52.84s/it]

[I 2025-01-25 15:37:39,470] Trial 24 finished with value: 0.3513423204421997 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0005466576107159655, 'dropout_rate': 0.356429798260895, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 9.688993333608144e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  52%|█████▏    | 26/50 [23:57<25:08, 62.85s/it]

[I 2025-01-25 15:39:05,670] Trial 25 finished with value: 0.3539917767047882 and parameters: {'layers': 4, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0004346139108821628, 'dropout_rate': 0.36968001284345714, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 0.00021503087850160057}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  54%|█████▍    | 27/50 [25:25<27:03, 70.60s/it]

[I 2025-01-25 15:40:34,371] Trial 26 finished with value: 0.354696124792099 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.0011344261214742194, 'dropout_rate': 0.27316559698416687, 'batch_size': 16, 'optimizer': 'Adam', 'weight_decay': 0.00048800979968027444}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  56%|█████▌    | 28/50 [26:49<27:23, 74.68s/it]

[I 2025-01-25 15:41:58,570] Trial 27 finished with value: 0.3558090031147003 and parameters: {'layers': 3, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.002619685019415885, 'dropout_rate': 0.342725524895104, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 0.00012421427510292602}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  58%|█████▊    | 29/50 [28:47<30:37, 87.48s/it]

[I 2025-01-25 15:43:55,918] Trial 28 finished with value: 0.3746684491634369 and parameters: {'layers': 6, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0006579791371798826, 'dropout_rate': 0.4945645382599362, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 4.0971215010941795e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  60%|██████    | 30/50 [29:58<27:31, 82.56s/it]

[I 2025-01-25 15:45:06,991] Trial 29 finished with value: 0.3560299873352051 and parameters: {'layers': 4, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.0011469345102268123, 'dropout_rate': 0.4600648412282565, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 9.330586763119045e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  62%|██████▏   | 31/50 [30:22<20:37, 65.11s/it]

[I 2025-01-25 15:45:31,380] Trial 30 finished with value: 0.3522026240825653 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.00037947393716052124, 'dropout_rate': 0.4217836552630669, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 7.62744960655205e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  64%|██████▍   | 32/50 [30:47<15:52, 52.89s/it]

[I 2025-01-25 15:45:55,771] Trial 31 finished with value: 0.3534396290779114 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.00039208587723470336, 'dropout_rate': 0.41762837663441665, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 7.729181356085634e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  66%|██████▌   | 33/50 [31:15<12:53, 45.49s/it]

[I 2025-01-25 15:46:24,007] Trial 32 finished with value: 0.35465264320373535 and parameters: {'layers': 3, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.0006652585656314849, 'dropout_rate': 0.38731433991345077, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 0.00011352697865695076}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  68%|██████▊   | 34/50 [31:29<09:38, 36.13s/it]

[I 2025-01-25 15:46:38,293] Trial 33 finished with value: 0.380054771900177 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.00018518558488923447, 'dropout_rate': 0.4695506864424743, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 0.0001765508260264492}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  70%|███████   | 35/50 [31:41<07:13, 28.87s/it]

[I 2025-01-25 15:46:50,220] Trial 34 finished with value: 0.3526572287082672 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0003786498318166897, 'dropout_rate': 0.33707508854134643, 'batch_size': 128, 'optimizer': 'RMSprop', 'weight_decay': 7.405803851253978e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  72%|███████▏  | 36/50 [32:18<07:18, 31.36s/it]

[I 2025-01-25 15:47:27,380] Trial 35 finished with value: 0.37203946709632874 and parameters: {'layers': 5, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.0001418901334660834, 'dropout_rate': 0.37479161340304534, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 0.0007380617025856451}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  74%|███████▍  | 37/50 [32:34<05:47, 26.77s/it]

[I 2025-01-25 15:47:43,439] Trial 36 finished with value: 0.3528905510902405 and parameters: {'layers': 4, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0023164575043853404, 'dropout_rate': 0.41997247258195103, 'batch_size': 128, 'optimizer': 'RMSprop', 'weight_decay': 0.00022337972338439815}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  76%|███████▌  | 38/50 [34:14<09:44, 48.69s/it]

[I 2025-01-25 15:49:23,273] Trial 37 finished with value: 0.35332605242729187 and parameters: {'layers': 3, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.0005274722425320992, 'dropout_rate': 0.1880588211554321, 'batch_size': 16, 'optimizer': 'Adam', 'weight_decay': 2.1948881402802524e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  78%|███████▊  | 39/50 [34:35<07:22, 40.27s/it]

[I 2025-01-25 15:49:43,888] Trial 38 finished with value: 0.3565104603767395 and parameters: {'layers': 2, 'nodes': 256, 'epochs': 40, 'learning_rate': 0.0012120050751795153, 'dropout_rate': 0.2998538441291358, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 4.0944112443121864e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  80%|████████  | 40/50 [34:41<05:00, 30.04s/it]

[I 2025-01-25 15:49:50,061] Trial 39 finished with value: 0.5729897618293762 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.00025508454557791383, 'dropout_rate': 0.329242634398772, 'batch_size': 128, 'optimizer': 'SGD', 'weight_decay': 0.0003098422390575305}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  82%|████████▏ | 41/50 [35:36<05:38, 37.60s/it]

[I 2025-01-25 15:50:45,318] Trial 40 finished with value: 0.3595108687877655 and parameters: {'layers': 3, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.0007099532961938904, 'dropout_rate': 0.2806710387165047, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 6.939611702775225e-05}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  84%|████████▍ | 42/50 [35:54<04:14, 31.81s/it]

[I 2025-01-25 15:51:03,601] Trial 41 finished with value: 0.35127824544906616 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.0005046122779961765, 'dropout_rate': 0.3547376397374731, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.00010030097960713236}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  86%|████████▌ | 43/50 [36:13<03:14, 27.80s/it]

[I 2025-01-25 15:51:22,055] Trial 42 finished with value: 0.35532915592193604 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.0003498330162204571, 'dropout_rate': 0.3560955574816244, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.0001268727800952201}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  88%|████████▊ | 44/50 [36:43<02:51, 28.55s/it]

[I 2025-01-25 15:51:52,360] Trial 43 finished with value: 0.3520059287548065 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0004814039673200893, 'dropout_rate': 0.37736213212125935, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.0001077767491220491}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  90%|█████████ | 45/50 [37:10<02:19, 27.91s/it]

[I 2025-01-25 15:52:18,751] Trial 44 finished with value: 0.4001922309398651 and parameters: {'layers': 2, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.002016218979151261, 'dropout_rate': 0.32243400271648087, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.00010328801396902974}. Best is trial 11 with value: 0.35125932097435.


Best trial: 11. Best value: 0.351259:  92%|█████████▏| 46/50 [37:31<01:43, 25.86s/it]

[I 2025-01-25 15:52:39,851] Trial 45 finished with value: 0.3533632755279541 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.00047210095412159346, 'dropout_rate': 0.36872579984423987, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.00015703201827853728}. Best is trial 11 with value: 0.35125932097435.


Best trial: 46. Best value: 0.348726:  94%|█████████▍| 47/50 [37:58<01:19, 26.41s/it]

[I 2025-01-25 15:53:07,528] Trial 46 finished with value: 0.3487260043621063 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0009715619067182132, 'dropout_rate': 0.38247415095870707, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.00018542078372695}. Best is trial 46 with value: 0.3487260043621063.


Best trial: 46. Best value: 0.348726:  96%|█████████▌| 48/50 [38:14<00:46, 23.25s/it]

[I 2025-01-25 15:53:23,423] Trial 47 finished with value: 0.4379606544971466 and parameters: {'layers': 1, 'nodes': 128, 'epochs': 30, 'learning_rate': 0.0007978633535403383, 'dropout_rate': 0.31638567333101364, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.00021673587066476353}. Best is trial 46 with value: 0.3487260043621063.


Best trial: 46. Best value: 0.348726:  98%|█████████▊| 49/50 [38:48<00:26, 26.36s/it]

[I 2025-01-25 15:53:57,041] Trial 48 finished with value: 0.3507152497768402 and parameters: {'layers': 1, 'nodes': 256, 'epochs': 40, 'learning_rate': 0.0009484234359620656, 'dropout_rate': 0.40005432261502183, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.0006108099013293767}. Best is trial 46 with value: 0.3487260043621063.


Best trial: 46. Best value: 0.348726: 100%|██████████| 50/50 [39:22<00:00, 47.26s/it]

[I 2025-01-25 15:54:31,501] Trial 49 finished with value: 0.35163548588752747 and parameters: {'layers': 1, 'nodes': 256, 'epochs': 40, 'learning_rate': 0.0010360889572811538, 'dropout_rate': 0.40703317734843525, 'batch_size': 32, 'optimizer': 'Adam', 'weight_decay': 0.0005945091105117168}. Best is trial 46 with value: 0.3487260043621063.





In [22]:
# Best model training and saving
best_params = study.best_params
best_model = SalaryPredictor(
    X_train.shape[1], 
    best_params['nodes'], 
    best_params['layers'], 
    best_params['dropout_rate']
)

In [23]:
print("Best hyperparameters:", best_params)
print("Best validation loss:", study.best_value)

Best hyperparameters: {'layers': 1, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.0009715619067182132, 'dropout_rate': 0.38247415095870707, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.00018542078372695}
Best validation loss: 0.3487260043621063


In [24]:
# Save the best model state
torch.save(best_model.state_dict(), 'Best_ANN_classification_model.h5')
# It allow to Preserve the model's learned parameters for future use 
# without having to retrain the model.

In [25]:
# Save the best hyperparameters in a pickle file
with open('Best_Hyperparameters.pkl', 'wb') as f:
    pickle.dump(best_params, f)

In [26]:
# Save the study object
with open('study.pkl', 'wb') as f:
    pickle.dump(study, f)

In [28]:
# Save the best model in onnx format
# dummy_input = torch.randn(1, X_train.shape[1])
# torch.onnx.export(best_model, dummy_input, 'Best_ANN_classification_model.onnx', verbose=True)

# Save the best model in torchscript format
# traced_model = torch.jit.trace(best_model, dummy_input)
# traced_model.save('Best_ANN_classification_model.pt')