In [48]:
import numpy as np
import pandas as pd
import tensorflow as tf

#### *Loading Datatset*

In [49]:
df=pd.read_csv("datasets/Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


#### *Data Preprocessing*

In [50]:
df=df.drop(columns=["RowNumber","CustomerId","Surname"])

In [51]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [52]:
le_gender=LabelEncoder()
df["Gender"]=le_gender.fit_transform(df["Gender"])

In [53]:
ohe_geography=OneHotEncoder()
geography=ohe_geography.fit_transform(df[["Geography"]]).toarray()

In [54]:
geography

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [55]:
geography_df=pd.DataFrame(geography,columns=ohe_geography.get_feature_names_out())

In [56]:
geography_df.head()

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0


In [57]:
df=df.drop(columns=["Geography"],axis=1)
df=pd.concat([df,geography_df],axis=1)

In [59]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


#### *Feature Selection*

In [62]:
X=df.drop(columns=["Exited"],axis=1)
Y=df["Exited"]

#### *Train-Test Split*

In [63]:
from sklearn.model_selection import train_test_split
X_train,x_test,Y_train,y_test=train_test_split(X,Y, test_size=0.2,random_state=42)

#### *Standardization*

In [65]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
x_test=scaler.transform(x_test)

In [67]:
X_train.shape

(8000, 12)

#### *Model training*

In [69]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [73]:
model=Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32,activation="relu"),
    Dense(1,activation="sigmoid")
          ])

In [80]:
model.compile(loss="binary_crossentropy",
             optimizer=tf.keras.optimizers.Adam(),
             metrics=["accuracy"])

#### *Callbacks*

In [81]:
import datetime
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
log_dir="logs/fit/"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)
early_stopping=EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True)


In [82]:
history1=model.fit(X_train,Y_train,
                  validation_data=(x_test,y_test),
                  epochs=100,
                  callbacks=[tensorboard_callback,early_stopping])

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.7913 - loss: 26.9399 - val_accuracy: 0.8045 - val_loss: 1.4058
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7817 - loss: 0.6176 - val_accuracy: 0.8135 - val_loss: 0.4342
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8192 - loss: 0.4414 - val_accuracy: 0.8000 - val_loss: 0.4501
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8214 - loss: 0.4310 - val_accuracy: 0.8225 - val_loss: 0.4158
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8232 - loss: 0.4188 - val_accuracy: 0.8295 - val_loss: 0.4048
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8291 - loss: 0.4128 - val_accuracy: 0.8315 - val_loss: 0.4120
Epoch 7/100
[1m250/2

In [83]:
%load_ext tensorboard

In [86]:
%tensorboard --logdir=logs/fit

#### *Saving our model and preprocessing tools*

In [85]:
import pickle

In [87]:
model=model.save("model.h5")



In [89]:
with open("le.pkl","wb")as file:
    pickle.dump(le_gender,file)

In [91]:
with open("ohe.pkl","wb")as file:
    pickle.dump(ohe_geography,file)

In [92]:
with open("scaler.pkl","wb")as file:
    pickle.dump(scaler,file)

#### *Predicting New data*

In [126]:
new_data={'CreditScore':502,
          'Geography':'France',
          'Gender':'Female',
          'Age':42,
          'Tenure':8,
          'Balance':159660.80,
          'NumOfProducts':3,
          'HasCrCard':1,
          'IsActiveMember':0,
          'EstimatedSalary':113931.57}

In [127]:
new_data_df=pd.DataFrame([new_data])

#### *loading our saved models*

In [128]:
with open("le.pkl","rb")as file:
    le=pickle.load(file)

In [129]:
with open("ohe.pkl","rb")as file:
    ohe=pickle.load(file)

In [136]:
with open("scaler.pkl","rb")as file:
    scaler=pickle.load(file)

In [138]:
model=tf.keras.models.load_model("model.h5")



In [130]:
new_data_df["Gender"]=le.transform(new_data_df["Gender"])

In [131]:
new_data_geo=ohe.transform(new_data_df[["Geography"]]).toarray()

In [132]:
new_data_geo_df=pd.DataFrame(new_data_geo,columns=ohe.get_feature_names_out())

In [134]:
new_data_df=new_data_df.drop(columns=["Geography"],axis=1)

In [135]:
new_data_df=pd.concat([new_data_df,new_data_geo_df],axis=1)

In [137]:
scaler_data=scaler.transform(new_data_df)

In [142]:
predict=model.predict(scaler_data)[0][0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step


In [146]:
if predict<0.5:
    print("Not churn")
else:
    print("Churn")

Churn
