In [1]:
## Libraries for data manipulation, preprocessing, and serialization
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [2]:
# Loading and inspecting the dataset
data = pd.read_csv('Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
# Dropping irrelevant features

data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1) ## axis = 1 means columns

#print(data.columns.tolist())

Encoding Categorical Variables
The dataset contains categorical variables: geography and gender. These need to be encoded into numerical values.

In [4]:
## Label encoding for gender
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

In [5]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [6]:
## One HOt Encoding for Geography
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoded = onehot_encoder_geo.fit_transform(data[['Geography']])

In [7]:
geo_encoded

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [8]:
geo_encoded.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]], shape=(10000, 3))

In [9]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [10]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

The one-hot encoder creates separate columns for each geography category, assigning 1 where the category is present and 0 otherwise

In [11]:
geo_encoded_df = pd.DataFrame(geo_encoded.toarray(), columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

In [12]:
print(geo_encoded.shape)

(10000, 3)


In [13]:
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [14]:
##Combining Encoded Features with Original Data
'''The original geography column is dropped, and the one-hot encoded columns are concatenated 
with the main dataset.
'''
data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

In [15]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [16]:
# Saving Encoders for Deployment
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)
with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

Splitting Data into Features and Target
The dataset is divided into independent (X) and dependent (y) features. The 'Exited' column is the target variable.

In [17]:
X = data.drop('Exited', axis=1)
y = data['Exited']

In [18]:
'''Train-Test Split
The data is split into training and testing sets using an 80-20 split and a random state of 42.'''
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [19]:
# Feature Scalling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-0.20389777,  0.91324755,  0.29493847, ..., -0.99850112,
         1.72572313, -0.57638802],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.99850112,
        -0.57946723,  1.73494238],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.15932282,  0.91324755,  0.3900109 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.99850112,
         1.72572313, -0.57638802]], shape=(8000, 12))

In [20]:
# Saving the Scalar
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

The dataset has been cleaned and transformed using feature engineering techniques such as label encoding, one-hot encoding, and standard scaling. The encoders and scaler have been saved for future use. The data is now ready for training an artificial neural network, which will be covered in the next session.

Key parameters when building the model include:

Initializing a sequential model.
Adding dense layers with a specified number of neurons.
Applying activation functions such as ReLU for hidden layers and sigmoid or softmax for output layers.
Specifying the input shape for the first hidden layer.
Choosing an optimizer for training.
Selecting an appropriate loss function.
Defining metrics to evaluate model performance.

In [21]:
## Building the ANN model with TensorFlow Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [22]:
(X_train.shape[1],)

(12,)

In [23]:
## Constructing the Sequential Model
'''We initialize the sequential model and add layers:

The first hidden layer has 64 neurons with ReLU activation and specifies the input shape based on the training data features.
The second hidden layer has 32 neurons with ReLU activation.
The output layer has 1 neuron with sigmoid activation for binary classification.
'''
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))  # First hidden layer connected to input layer 
model.add(Dense(32, activation='relu'))  # Second hidden layer
model.add(Dense(1, activation='sigmoid'))  # Output layer

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
model.summary() # view the total number of trainable parameters,
# which includes weights and biases across all layers.

In [25]:
##Compiling the Model
'''We compile the model by specifying:

The optimizer, such as Adam, which updates weights during backpropagation.
The loss function, here binary cross-entropy for binary classification.
Metrics to evaluate, such as accuracy.
We can initialize the optimizer with a custom learning rate if desired.'''

from tensorflow.keras.optimizers import Adam

opt = Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

In [26]:
## Setting Up TensorBoard and EarlyStopping Callbacks
'''TensorBoard is used to visualize training logs and metrics.'''
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [27]:
## Early Stopping
'''EarlyStopping monitors validation loss and stops training if no improvement occurs for a specified number of epochs (patience).
EarlyStopping also restores the best weights found during training.'''

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [28]:
## Training the Model
'''We train the model using model.fit() by providing:

Training data (X_train, y_train).
Validation data (X_test, y_test) to monitor performance.
Number of epochs (e.g., 100).
Callbacks for TensorBoard and EarlyStopping.'''
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[tensorboard_callback, early_stopping_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8309 - loss: 0.4023 - val_accuracy: 0.8475 - val_loss: 0.3688
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8531 - loss: 0.3550 - val_accuracy: 0.8545 - val_loss: 0.3510
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8564 - loss: 0.3488 - val_accuracy: 0.8525 - val_loss: 0.3480
Epoch 4/100
[1m236/250[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step - accuracy: 0.8550 - loss: 0.3421

KeyboardInterrupt: 

In [None]:
## Save the trained Model
model.save('model.h5')

In [None]:
## Launching TensorBorad
'''
To visualize training logs, load the TensorBoard extension and 
launch a session pointing to the log directory.'''
%load_ext tensorboard
%tensorboard --logdir logs/fit/20250921-215933

In [None]:
import tensorflow as tf
print(tf.__version__)


2.20.0
