In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split

In [2]:
df=pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
df.drop(columns=['CustomerId','RowNumber','Surname'],axis=1,inplace=True)

In [4]:
df.isnull().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Geography        10000 non-null  object 
 2   Gender           10000 non-null  object 
 3   Age              10000 non-null  int64  
 4   Tenure           10000 non-null  int64  
 5   Balance          10000 non-null  float64
 6   NumOfProducts    10000 non-null  int64  
 7   HasCrCard        10000 non-null  int64  
 8   IsActiveMember   10000 non-null  int64  
 9   EstimatedSalary  10000 non-null  float64
 10  Exited           10000 non-null  int64  
dtypes: float64(2), int64(7), object(2)
memory usage: 859.5+ KB


In [6]:
# Here all the column are Feature cleaned
# Now covert the classification column should be convert to numerical

In [7]:
df['Gender'].unique()

array(['Female', 'Male'], dtype=object)

In [8]:
one_hot_encoder_Gender=OneHotEncoder()
gender_enncoder_matrix=one_hot_encoder_Gender.fit_transform(df[['Gender']]).toarray()
gender_enncoder_matrix

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [0., 1.],
       [1., 0.]])

In [9]:
df['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [10]:
one_hot_encoder_Geography=OneHotEncoder()
geography_enncoder_matrix=one_hot_encoder_Geography.fit_transform(df[['Geography']]).toarray()
geography_enncoder_matrix

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [11]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [12]:
df=pd.concat([
	
	df.drop(columns=["Geography","Gender"]) , 
	pd.DataFrame(gender_enncoder_matrix,columns=one_hot_encoder_Gender.get_feature_names_out()),
	pd.DataFrame(geography_enncoder_matrix,columns=one_hot_encoder_Geography.get_feature_names_out())
	
	],axis=1)

df.head()


Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Gender_Female,Gender_Male,Geography_France,Geography_Germany,Geography_Spain
0,619,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,1.0,0.0,0.0
1,608,41,1,83807.86,1,0,1,112542.58,0,1.0,0.0,0.0,0.0,1.0
2,502,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,1.0,0.0,0.0
3,699,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,1.0,0.0,0.0
4,850,43,2,125510.82,1,1,1,79084.1,0,1.0,0.0,0.0,0.0,1.0


In [13]:
#save files

import pickle
with open('one_hot_encoder_Gender.pkl',"wb") as file:
	pickle.dump(one_hot_encoder_Gender,file)

with open('one_hot_encoder_Geography.pkl',"wb") as file:
	pickle.dump(one_hot_encoder_Geography,file)

In [14]:
# separate train and test data
X=df.drop(columns=['Exited'],axis=1)
Y=df['Exited']

X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state=41,test_size=.28)

In [15]:
# standardization

scalar=StandardScaler()
X_train=scalar.fit_transform(X_train)
X_test=scalar.transform(X_test)

In [16]:
# save standared scalling

with open('scalar.pkl',"wb") as file:
	pickle.dump(scalar,file)

#### ANN training start

In [17]:
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [18]:
model=Sequential([

	Dense(64,activation='relu',input_shape=(X_train.shape[1],)), # first Hiden layer
	Dense(32,activation='relu'), # second Hiden layer
	Dense(1,activation='sigmoid'), # output layer

])

model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Sequential name=sequential, built=True>

In [19]:
model.summary()

In [20]:
optimiser=tensorflow.keras.optimizers.Adam(learning_rate=0.02)
loss=tensorflow.keras.losses.BinaryCrossentropy()

In [21]:
# compile the model
model.compile(optimizer=optimiser,loss=loss,metrics=['accuracy'])

In [22]:
# create log files for tensorboard

log_path="log/fit/"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_path,histogram_freq=1)

In [23]:
# set up early stop 
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [24]:
model.fit(
	X_train,Y_train, validation_data=(X_test,Y_test),
	epochs=100,
	callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8013 - loss: 0.4555 - val_accuracy: 0.8368 - val_loss: 0.3713
Epoch 2/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8279 - loss: 0.3822 - val_accuracy: 0.8565 - val_loss: 0.3463
Epoch 3/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8539 - loss: 0.3482 - val_accuracy: 0.8526 - val_loss: 0.3605
Epoch 4/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8472 - loss: 0.3460 - val_accuracy: 0.8468 - val_loss: 0.3606
Epoch 5/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8542 - loss: 0.3484 - val_accuracy: 0.8497 - val_loss: 0.3574
Epoch 6/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8553 - loss: 0.3378 - val_accuracy: 0.8501 - val_loss: 0.3508
Epoch 7/100
[1m225/22

<keras.src.callbacks.history.History at 0x198f323e950>

In [25]:
model.save('model.h5')



In [26]:
%load_ext tensorboard

In [28]:
!kill 3192

kill: 3192: No such process


In [27]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 3192), started 0:11:39 ago. (Use '!kill 3192' to kill it.)