In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder

In [2]:
data = pd.read_csv('Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
data.drop(['RowNumber', 'CustomerId','Surname'], axis = 1, inplace = True)

In [4]:
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

In [5]:
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoder


array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]], shape=(10000, 3))

In [6]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [7]:
geo_encoded_df = pd.DataFrame(geo_encoder, columns = onehot_encoder_geo.get_feature_names_out(['Geography']), index = data.index)  
geo_encoded_df    

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [8]:
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


In [9]:
data = pd.concat([data.drop(['Geography'], axis = 1), geo_encoded_df], axis = 1)
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [10]:
X = data.drop('Exited', axis = 1)
y = data['Exited']

In [11]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [12]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((8000, 12), (2000, 12), (8000,), (2000,))

In [13]:
## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(x_train)
X_test=scaler.transform(x_test)

In [14]:
import torch 
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, Dataset


In [15]:
class ChurnDataset(Dataset):
    def __init__(self,features,labels):
        self.features = torch.tensor(features,dtype=torch.float32)
        self.labels = torch.tensor(labels.values,dtype=torch.long)

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self,idx):
        return self.features[idx], self.labels[idx]

In [16]:
train_dataset = ChurnDataset(X_train, y_train)
test_dataset = ChurnDataset(X_test, y_test)

In [17]:
class Model(nn.Module):
    def __init__(self , num_features):
        super(Model, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    def forward(self, features):
        return self.network(features)

In [18]:
import torch.optim as optim

In [19]:
class MyNN(nn.Module):
    def __init__(self, input_dim, hidden_sizes, output_dim):
      super().__init__()

      layers = []
      prev_dim = input_dim

      for h in hidden_sizes:
          layers.append(nn.Linear(prev_dim, h))
          layers.append(nn.BatchNorm1d(h))
          layers.append(nn.ReLU())
          prev_dim = h

      layers.append(nn.Linear(prev_dim, output_dim))
      layers.append(nn.Sigmoid())

      self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [28]:
# objective function
def objective(trial):

  # next hyperparameter values from the search space

  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
  neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)
  epochs = trial.suggest_int("epochs", 10, 50, step=10)
  learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
  batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
  optimizer_name = trial.suggest_categorical("optimizer", ['Adam', 'SGD', 'RMSprop'])

  hidden_sizes = []
  for i in range(num_hidden_layers):
      size = trial.suggest_int(f"layer_{i}_size", 8, 256, step=8)
      hidden_sizes.append(size)

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  # model init
  input_dim = 12
  output_dim = 1

  model = MyNN(input_dim, hidden_sizes, output_dim) 

  # optimizer selection
  criterion = nn.BCELoss()

  if optimizer_name == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
  elif optimizer_name == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
  else:
    optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

  # training loop

  for epoch in range(epochs):

    for batch_features, batch_labels in train_loader:

      # forward pass
      outputs = model(batch_features)

      # calculate loss
      # loss = criterion(outputs, batch_labels.float().unsqueeze(1))
      loss = criterion(outputs, batch_labels.float().view(-1, 1))

      # back pass
      optimizer.zero_grad()
      loss.backward()

      # update grads
      optimizer.step()


  # evaluation
  model.eval()
  # evaluation on test data
  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

        outputs = model(batch_features)
        predicted = (outputs > 0.5).float()
        total += batch_features.shape[0]
        correct += (predicted.squeeze() == batch_labels.float()).sum().item()
    accuracy = (correct/total )*100

  return accuracy

In [29]:

%pip install optuna

Note: you may need to restart the kernel to use updated packages.


In [30]:

import optuna
study = optuna.create_study(direction='maximize')

[I 2025-11-26 15:20:56,346] A new study created in memory with name: no-name-bb80b3b4-0a2f-4e1d-833e-6c8d611142f2


In [31]:
study.optimize(objective, n_trials=20)

[I 2025-11-26 15:21:07,223] Trial 0 finished with value: 86.05000000000001 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 112, 'epochs': 40, 'learning_rate': 0.0004578152307800804, 'batch_size': 64, 'optimizer': 'Adam', 'layer_0_size': 152}. Best is trial 0 with value: 86.05000000000001.
[I 2025-11-26 15:21:13,281] Trial 1 finished with value: 86.2 and parameters: {'num_hidden_layers': 3, 'neurons_per_layer': 16, 'epochs': 20, 'learning_rate': 0.004813582209417723, 'batch_size': 64, 'optimizer': 'SGD', 'layer_0_size': 232, 'layer_1_size': 128, 'layer_2_size': 192}. Best is trial 1 with value: 86.2.
[I 2025-11-26 15:22:25,912] Trial 2 finished with value: 86.4 and parameters: {'num_hidden_layers': 5, 'neurons_per_layer': 8, 'epochs': 40, 'learning_rate': 0.0002923273871756659, 'batch_size': 16, 'optimizer': 'Adam', 'layer_0_size': 120, 'layer_1_size': 256, 'layer_2_size': 192, 'layer_3_size': 112, 'layer_4_size': 256}. Best is trial 2 with value: 86.4.
[I 2025-11-26 15:22

In [32]:
study.best_params

{'num_hidden_layers': 4,
 'neurons_per_layer': 96,
 'epochs': 20,
 'learning_rate': 0.02395650637602851,
 'batch_size': 32,
 'optimizer': 'Adam',
 'layer_0_size': 216,
 'layer_1_size': 200,
 'layer_2_size': 104,
 'layer_3_size': 104}

In [33]:
study.best_value

86.85000000000001