In [3]:
import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import optuna

In [4]:
# Load data
df = pd.read_csv('Churn_Modelling.csv')

# Prepossing The Data

In [5]:
# Drop unnecessary columns
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [6]:
# Encode categorical variables
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])
    
# One-hot encode Geography
geo_encoder = OneHotEncoder(sparse_output=False)
geo_encoded = geo_encoder.fit_transform(df['Geography'].values.reshape(-1, 1))
geo_df = pd.DataFrame(geo_encoded, columns=['France', 'Germany', 'Spain'])
df = pd.concat([df.drop('Geography', axis=1), geo_df], axis=1)

In [7]:
# Save The Encoded Data In Pickle File :
with open('lable_Encoding_gender.pkl','wb') as file:
    pickle.dump(le_gender,file)
    
with open('One_Hot_Encoding_Geography.pkl','wb') as file:
    pickle.dump(geo_encoder,file)    

In [8]:
# Split The Data Into Train & Test :
X=df.drop(['EstimatedSalary'],axis=1)
y=df['EstimatedSalary']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=50)
X_train.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited,France,Germany,Spain
5189,826,1,41,5,146466.46,2,0,0,0,0.0,0.0,1.0
7969,637,1,49,2,108204.52,1,1,0,1,0.0,1.0,0.0
9039,545,0,44,1,0.0,2,1,1,0,0.0,0.0,1.0
5208,779,0,38,7,0.0,2,1,1,0,0.0,0.0,1.0
506,691,1,30,7,116927.89,1,1,0,0,0.0,1.0,0.0


In [9]:
# Standardization (or z-score normalization) of the features data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save the StandardScaler object in pickle file
os.makedirs('artifacts', exist_ok=True)  # Create directory if it doesn't exist
with open('artifacts/scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Build Model Using Pytorch

In [11]:
# Model definition
class SalaryPredictor(nn.Module):
    def __init__(self, input_dim, nodes, layers, dropout_rate):
        super().__init__()
        
        # Input layer
        layers_list = [
            nn.Linear(input_dim, nodes),
            nn.BatchNorm1d(nodes),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        ]
        
        # Hidden layers
        current_nodes = nodes
        for _ in range(layers):
            next_nodes = max(current_nodes // 2, 64)
            layers_list.extend([
                nn.Linear(current_nodes, next_nodes),
                nn.BatchNorm1d(next_nodes),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            ])
            current_nodes = next_nodes
        
        # Output layer
        layers_list.append(nn.Linear(current_nodes, 1))
        
        self.model = nn.Sequential(*layers_list)
    
    def forward(self, x):
        return self.model(x)

# Create Optuna Object Function

In [12]:
# Optuna objective function
def objective(trial):
    # Hyperparameters to optimize
    layers = trial.suggest_int('layers', 1, 10)
    nodes = trial.suggest_categorical('nodes', [128, 256, 512])
    epochs = trial.suggest_int("epochs", 10, 50, step=10)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    optimizer_name = trial.suggest_categorical("optimizer", ['Adam', 'SGD', 'RMSprop'])
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

    
    # Prepare data
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1, 1)
    
    # Create DataLoader
    X_train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(X_train_dataset, batch_size=batch_size, shuffle=True)
    
    # Model, loss
    model = SalaryPredictor(X_train.shape[1], nodes, layers, dropout_rate)  # Fixed parameter order
    criterion = nn.MSELoss()
    
    # Optimizer Selection
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training Loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        if total_loss / len(train_loader) < 1e-4:
            break
    
    # Validation
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.FloatTensor(X_test)
        y_val_tensor = torch.FloatTensor(y_test.values).reshape(-1, 1)
        val_predictions = model(X_val_tensor)
        val_loss = nn.functional.mse_loss(val_predictions, y_val_tensor)
    
    return val_loss.item()

In [14]:
# Create Optuna study object and optimize it
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20, show_progress_bar=True) 
# Increased trials for better optimization

[I 2025-01-24 22:26:02,937] A new study created in memory with name: no-name-d9e35582-faee-4e64-bfc5-2854392e2a47
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
Best trial: 0. Best value: 1.32682e+10:   5%|▌         | 1/20 [00:15<04:49, 15.23s/it]

[I 2025-01-24 22:26:18,161] Trial 0 finished with value: 13268209664.0 and parameters: {'layers': 4, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.00111719501851434, 'dropout_rate': 0.2935822604542264, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 1.0995743047712087e-05}. Best is trial 0 with value: 13268209664.0.


Best trial: 1. Best value: 3.33933e+09:  10%|█         | 2/20 [00:43<06:52, 22.92s/it]

[I 2025-01-24 22:26:46,462] Trial 1 finished with value: 3339330816.0 and parameters: {'layers': 1, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.008973549805486144, 'dropout_rate': 0.24011218896979006, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.00015342300377368197}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  15%|█▌        | 3/20 [04:50<35:26, 125.07s/it]

[I 2025-01-24 22:30:53,097] Trial 2 finished with value: 12714425344.0 and parameters: {'layers': 10, 'nodes': 128, 'epochs': 40, 'learning_rate': 0.00034171418740720425, 'dropout_rate': 0.46468075456310165, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 0.00022812762762608346}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  20%|██        | 4/20 [05:16<22:58, 86.14s/it] 

[W 2025-01-24 22:31:19,550] Trial 3 failed with parameters: {'layers': 7, 'nodes': 512, 'epochs': 30, 'learning_rate': 0.0001837787278768557, 'dropout_rate': 0.44484694421779514, 'batch_size': 128, 'optimizer': 'SGD', 'weight_decay': 2.5457475388042588e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:31:19,555] Trial 3 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  25%|██▌       | 5/20 [05:28<14:53, 59.54s/it]

[I 2025-01-24 22:31:31,926] Trial 4 finished with value: 13326066688.0 and parameters: {'layers': 6, 'nodes': 128, 'epochs': 10, 'learning_rate': 0.00021807130019130713, 'dropout_rate': 0.42745896062708133, 'batch_size': 64, 'optimizer': 'Adam', 'weight_decay': 2.5057255682277814e-05}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  30%|███       | 6/20 [06:00<11:41, 50.09s/it]

[I 2025-01-24 22:32:03,663] Trial 5 finished with value: 13265260544.0 and parameters: {'layers': 8, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.0008194594374104536, 'dropout_rate': 0.49964729833528343, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 1.421948392855858e-05}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  35%|███▌      | 7/20 [06:11<08:05, 37.34s/it]

[I 2025-01-24 22:32:14,753] Trial 6 finished with value: 11210125312.0 and parameters: {'layers': 4, 'nodes': 512, 'epochs': 10, 'learning_rate': 0.008326790937436022, 'dropout_rate': 0.1674788927341025, 'batch_size': 64, 'optimizer': 'RMSprop', 'weight_decay': 1.9392719865247568e-05}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  40%|████      | 8/20 [08:30<13:56, 69.72s/it]

[I 2025-01-24 22:34:33,796] Trial 7 finished with value: 13202585600.0 and parameters: {'layers': 3, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.00012749336186290973, 'dropout_rate': 0.22798781644871588, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 6.61012452361262e-05}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  45%|████▌     | 9/20 [08:52<10:01, 54.65s/it]

[W 2025-01-24 22:34:55,302] Trial 8 failed with parameters: {'layers': 4, 'nodes': 512, 'epochs': 30, 'learning_rate': 0.0004396108078038496, 'dropout_rate': 0.2492002809107571, 'batch_size': 64, 'optimizer': 'SGD', 'weight_decay': 4.467987891758455e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:34:55,308] Trial 8 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  50%|█████     | 10/20 [10:46<12:09, 72.90s/it]

[I 2025-01-24 22:36:49,080] Trial 9 finished with value: 4223362816.0 and parameters: {'layers': 9, 'nodes': 512, 'epochs': 30, 'learning_rate': 0.004608742601028917, 'dropout_rate': 0.38293447146604476, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.00015914962239967927}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  55%|█████▌    | 11/20 [11:06<08:32, 56.96s/it]

[W 2025-01-24 22:37:09,896] Trial 10 failed with parameters: {'layers': 4, 'nodes': 256, 'epochs': 20, 'learning_rate': 0.007651393143002524, 'dropout_rate': 0.21550613833749332, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.0003434964475429778} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:37:09,900] Trial 10 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  60%|██████    | 12/20 [12:48<09:25, 70.65s/it]

[I 2025-01-24 22:38:51,864] Trial 11 finished with value: 13296114688.0 and parameters: {'layers': 8, 'nodes': 128, 'epochs': 50, 'learning_rate': 0.00011669501687259122, 'dropout_rate': 0.2671207319070979, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.0006851340341955975}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  65%|██████▌   | 13/20 [12:53<05:54, 50.69s/it]

[W 2025-01-24 22:38:56,617] Trial 12 failed with parameters: {'layers': 8, 'nodes': 256, 'epochs': 10, 'learning_rate': 0.00043135397691161116, 'dropout_rate': 0.38667789519722506, 'batch_size': 128, 'optimizer': 'SGD', 'weight_decay': 3.735686979238252e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:38:56,620] Trial 12 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  70%|███████   | 14/20 [13:02<03:49, 38.18s/it]

[I 2025-01-24 22:39:05,890] Trial 13 finished with value: 13314283520.0 and parameters: {'layers': 3, 'nodes': 256, 'epochs': 20, 'learning_rate': 0.0005563245699748222, 'dropout_rate': 0.4109444590190595, 'batch_size': 128, 'optimizer': 'RMSprop', 'weight_decay': 0.0002648646079107434}. Best is trial 1 with value: 3339330816.0.


Best trial: 1. Best value: 3.33933e+09:  75%|███████▌  | 15/20 [13:22<02:42, 32.56s/it]

[W 2025-01-24 22:39:25,409] Trial 14 failed with parameters: {'layers': 1, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.0025734566729838718, 'dropout_rate': 0.11316564284974656, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 5.6939397894507956e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:39:25,413] Trial 14 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  80%|████████  | 16/20 [13:42<01:54, 28.67s/it]

[W 2025-01-24 22:39:45,038] Trial 15 failed with parameters: {'layers': 1, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.002903560378269382, 'dropout_rate': 0.11443463665591375, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 5.946123400814716e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:39:45,045] Trial 15 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  85%|████████▌ | 17/20 [14:00<01:16, 25.47s/it]

[W 2025-01-24 22:40:03,094] Trial 16 failed with parameters: {'layers': 1, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.0023314228237976344, 'dropout_rate': 0.10013639926351786, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 6.427408404553757e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:40:03,097] Trial 16 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  90%|█████████ | 18/20 [14:19<00:47, 23.71s/it]

[W 2025-01-24 22:40:22,711] Trial 17 failed with parameters: {'layers': 1, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.0019515600563334212, 'dropout_rate': 0.10360150316820588, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 6.249189240570181e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:40:22,716] Trial 17 failed with value nan.


Best trial: 1. Best value: 3.33933e+09:  95%|█████████▌| 19/20 [14:41<00:22, 22.99s/it]

[W 2025-01-24 22:40:44,000] Trial 18 failed with parameters: {'layers': 1, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.0020042535104264707, 'dropout_rate': 0.10319424795273621, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 5.59542837590734e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:40:44,006] Trial 18 failed with value nan.


Best trial: 1. Best value: 3.33933e+09: 100%|██████████| 20/20 [14:59<00:00, 44.98s/it]

[W 2025-01-24 22:41:02,592] Trial 19 failed with parameters: {'layers': 1, 'nodes': 256, 'epochs': 30, 'learning_rate': 0.0026805257494689734, 'dropout_rate': 0.1066447448934231, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 6.171048270313787e-05} because of the following error: The value nan is not acceptable.
[W 2025-01-24 22:41:02,595] Trial 19 failed with value nan.





In [15]:
# Best model training and saving
best_params = study.best_params
best_model = SalaryPredictor(
    X_train.shape[1], 
    best_params['nodes'], 
    best_params['layers'], 
    best_params['dropout_rate']
)

In [16]:
print("Best hyperparameters:", best_params)
print("Best validation loss:", study.best_value)

Best hyperparameters: {'layers': 1, 'nodes': 512, 'epochs': 20, 'learning_rate': 0.008973549805486144, 'dropout_rate': 0.24011218896979006, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 0.00015342300377368197}
Best validation loss: 3339330816.0


In [18]:
# Save the best model state
torch.save(best_model.state_dict(), 'Best_ANN_regression_model.h5')
# It allow to Preserve the model's learned parameters for future use 
# without having to retrain the model.

In [28]:
# Save the best hyperparameters in a pickle file
with open('Best_Hyperparameters.pkl', 'wb') as f:
    pickle.dump(best_params, f)

In [20]:
# Save the study object
with open('study.pkl', 'wb') as f:
    pickle.dump(study, f)

In [None]:
# Save the best model in onnx format
# dummy_input = torch.randn(1, X_train.shape[1])
# torch.onnx.export(best_model, dummy_input, 'Best_ANN_regression_model.onnx', verbose=True)

# Save the best model in torchscript format
# traced_model = torch.jit.trace(best_model, dummy_input)
# traced_model.save('Best_ANN_regression_model.pt')