### Determining the optimal number of hidden layers and neurons for an Artificial Neural Network (ANN) 
This can be challenging and often requires experimentation. However, there are some guidelines and methods that can help you in making an informed decision:

- Start Simple: Begin with a simple architecture and gradually increase complexity if needed.
- Grid Search/Random Search: Use grid search or random search to try different architectures.
- Cross-Validation: Use cross-validation to evaluate the performance of different architectures.
- Heuristics and Rules of Thumb: Some heuristics and empirical rules can provide starting points, such as:
  -    The number of neurons in the hidden layer should be between the size of the input layer and the size of the output layer.
  -  A common practice is to start with 1–2 hidden layers.

In [1]:
# !pip uninstall -y scikit-learn
# !pip install scikit-learn

# !pip uninstall -y scikit-learn
!pip install scikit-learn==1.5.1

Collecting scikit-learn==1.5.1
  Using cached scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (12 kB)
Using cached scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl (11.0 MB)
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.6.1
    Uninstalling scikit-learn-1.6.1:
      Successfully uninstalled scikit-learn-1.6.1
Successfully installed scikit-learn-1.5.1


In [2]:
import os
import pickle

import pandas as pd
import sklearn

print(sklearn.__version__)

sklearn.set_config(display = "text")  #This line will suppress the HTML output.
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

1.5.1


In [3]:
from keras import Sequential

In [4]:
# Define the project's root directory (replace with your actual path)
project_root = "/Users/sunnythesage/PythonProjects/Data-Science-BootCamp/03-Deep-Learning-BootCamp/7 - End to End Deep Learning Project Using ANN/advanced-customer-churn-analysis-using-ann"

# Change the current working directory to the project's root
os.chdir(project_root)

# Define the artifacts directory path
artifacts_dir = os.path.join(os.getcwd(), 'artifacts')

# Create the artifacts directory if it doesn't exist
os.makedirs(artifacts_dir, exist_ok = True)

In [5]:
data = pd.read_csv('data/raw/churn-modelling-dataset.csv')

data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis = 1)

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

onehot_encoder_geo = OneHotEncoder(handle_unknown = 'ignore')
geo_encoded = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded, columns = onehot_encoder_geo.get_feature_names_out(['Geography']))

data = pd.concat([data.drop('Geography', axis = 1), geo_encoded_df], axis = 1)

X = data.drop('Exited', axis = 1)
y = data['Exited']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Save encoders and scaler for later use

# Save encoders and scaler to the artifacts folder
with open(os.path.join(artifacts_dir, 'label_encoder_gender.pkl'), 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open(os.path.join(artifacts_dir, 'onehot_encoder_geo.pkl'), 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

with open(os.path.join(artifacts_dir, 'scaler.pkl'), 'wb') as file:
    pickle.dump(scaler, file)

In [7]:
from keras.src import layers


def create_model(neurons = 32, num_layers = 1, input_dim = None):
    model = Sequential()
    model.add(layers.Input(shape = (input_dim,)))
    model.add(layers.Dense(neurons, activation = 'relu'))

    for _ in range(num_layers - 1):
        model.add(layers.Dense(neurons, activation = 'relu'))

    model.add(layers.Dense(1, activation = 'sigmoid'))
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

    return model

In [8]:
from scikeras.wrappers import KerasClassifier

In [9]:
# Patch KerasClassifier to implement __sklearn_tags__
def patched_sklearn_tags(self):
    return {'estimator_type': 'classifier'}


KerasClassifier.__sklearn_tags__ = patched_sklearn_tags

In [10]:
# Create KerasClassifier with explicit parameter mapping
model = KerasClassifier(
    model = create_model,
    model__neurons = 32,
    model__num_layers = 1,
    model__input_dim = X_train.shape[1],
    epochs = 50,
    batch_size = 32,
    verbose = 1
)

In [11]:
# Check available parameters
print(model.get_params().keys())

# Test with a single fit first
test_model = KerasClassifier(
    model = create_model,
    model__neurons = 32,
    model__num_layers = 1,
    model__input_dim = X_train.shape[1],
    epochs = 5,
    batch_size = 32
)

sklearn.set_config(display = "text")
test_model.fit(X_train[:100], y_train[:100])

dict_keys(['model', 'build_fn', 'warm_start', 'random_state', 'optimizer', 'loss', 'metrics', 'batch_size', 'validation_batch_size', 'verbose', 'callbacks', 'validation_split', 'shuffle', 'run_eagerly', 'epochs', 'model__neurons', 'model__num_layers', 'model__input_dim', 'class_weight'])
Epoch 1/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4298 - loss: 0.7898  
Epoch 2/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4484 - loss: 0.7846 
Epoch 3/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4649 - loss: 0.7457 
Epoch 4/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5028 - loss: 0.7418
Epoch 5/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5547 - loss: 0.6936


KerasClassifier(
	model=<function create_model at 0x15f921a80>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=32
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=5
	model__neurons=32
	model__num_layers=1
	model__input_dim=12
	class_weight=None
)

In [12]:
# Define parameter grid with correct prefixes
param_grid = {
    'model__neurons': [16, 32, 64],
    'model__num_layers': [1, 2],
    'model__input_dim': [X_train.shape[1]],  # input_dim is fixed, so it's a list with one element.
    'epochs': [50, 100],
    'batch_size': [32, 64]
}

In [None]:
# Create and run GridSearchCV
grid = GridSearchCV(
    estimator = model,
    param_grid = param_grid,
    cv = 3,
    verbose = 1,
    n_jobs = 1  # Keras models often don't work well with parallel processing
)

grid_result = grid.fit(X_train, y_train)

# Print best parameters
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Fitting 3 folds for each of 24 candidates, totalling 72 fits
Epoch 1/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 531us/step - accuracy: 0.5123 - loss: 0.7410
Epoch 2/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468us/step - accuracy: 0.7988 - loss: 0.4724
Epoch 3/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459us/step - accuracy: 0.8122 - loss: 0.4349
Epoch 4/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468us/step - accuracy: 0.8153 - loss: 0.4274
Epoch 5/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459us/step - accuracy: 0.8148 - loss: 0.4273
Epoch 6/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 450us/step - accuracy: 0.8324 - loss: 0.4036
Epoch 7/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459us/step - accuracy: 0.8268 - loss: 0.4144
Epoch 8/50
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468us/st