IMPORTING LIBRARIES

In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
%matplotlib inline

READING THE FILE "Churn_Modelling.CSV" INTO DATAFRAME

In [2]:
df = pd.read_csv("Churn_Modelling.csv")
df.sample(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
2701,2702,15797010,Shen,649,France,Female,31,2,0.0,2,1,0,15200.61,0
8934,8935,15630641,Shao,846,France,Female,37,6,127103.97,1,1,1,41516.44,0
5250,5251,15720820,Sabbatini,462,Germany,Female,24,9,69881.09,2,0,1,64421.02,0
6445,6446,15719170,Sagese,679,France,Female,30,1,112543.42,1,1,1,179435.21,0
9250,9251,15607330,Vasilyev,713,Spain,Male,42,0,109121.71,1,0,1,167873.49,0


DROPING 'RowNumber', 'CustomerId', 'Surname' AS THESE ARE OF NO USE

In [10]:
df.drop(['RowNumber','CustomerId','Surname'],axis='columns',inplace=True)

In [11]:
df.sample(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
5692,573,France,Female,30,6,0.0,2,1,0,66190.21,0
1132,628,France,Male,37,0,0.0,2,1,1,171707.93,0
4601,642,Germany,Male,30,8,134497.27,1,0,0,43250.54,0
8673,765,France,Female,56,1,0.0,1,1,0,13228.93,1
562,607,Spain,Male,36,8,158261.68,1,1,1,76744.72,0


In [12]:
df.dtypes

CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [13]:
df.shape

(10000, 11)

PRINTING ALL THE UNIQUE COLUMN VALUES WHICH ARE OF TYPE 'object'

In [14]:
def print_unique_col_values(df):
       for column in df:
            if df[column].dtypes=='object':
                print(f'{column}: {df[column].unique()}')

In [16]:
print_unique_col_values(df)

Geography: ['France' 'Spain' 'Germany']
Gender: ['Female' 'Male']


CONVERTING 'Gender' FROM 'object' TO 'int64' TYPE

In [17]:
df['Gender'].replace({'Female':1,'Male':0},inplace=True)

In [18]:
df.Gender.unique()

array([1, 0], dtype=int64)

MAKING DUMMY COLUMNS FOR 'Geography' AS IT WILL CONVERT 'object' TO 'int64' AND ALSO WILL SEPARATE THE VALUES 

In [21]:
df2 = pd.get_dummies(data=df, columns=['Geography'])
df2.sample(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
8906,613,1,32,0,0.0,2,0,1,126675.62,0,0,0,1
356,850,1,57,8,126776.3,2,1,1,132298.49,0,0,0,1
8432,479,0,47,6,121797.09,1,0,1,5811.9,1,1,0,0
9676,585,1,34,1,0.0,1,1,1,75503.6,0,1,0,0
8525,712,1,29,8,140170.61,1,1,1,38170.04,0,1,0,0


CHECKING FOR 'Nan' VALUES

In [22]:
df.isnull().values.any()

False

SCALING ALL THE COLUMNS IN 'cols_to_scale' SO THAT ALL THE VALUES IN THESE COLUMNS WILL BE BETWEEN 0 TO 1

In [23]:
cols_to_scale = ['CreditScore','Age','Tenure','Balance','NumOfProducts','EstimatedSalary']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])

In [24]:
X = df2.drop('Exited',axis='columns')
y = df2['Exited']

SPLITTING TESTING AND TRAINING DATA

In [25]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)

In [26]:
X_train.shape

(8000, 12)

In [27]:
X_train[:10]

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
7751,0.8,1,0.283784,0.6,0.0,0.333333,0,0,0.096273,0,0,1
4154,0.752,0,0.216216,0.3,0.0,0.333333,1,0,0.981478,1,0,0
3881,0.476,1,0.621622,0.3,0.0,0.0,1,1,0.948551,1,0,0
9238,0.846,1,0.432432,0.4,0.0,0.333333,1,0,0.646869,1,0,0
5210,0.402,0,0.22973,0.7,0.517012,0.333333,0,0,0.43467,1,0,0
7487,0.602,1,0.513514,0.4,0.0,0.0,0,0,0.421898,1,0,0
7542,0.314,1,0.216216,0.4,0.0,0.333333,1,1,0.303413,0,0,1
7524,0.62,1,0.297297,0.8,0.66633,0.0,1,1,0.925815,1,0,0
9412,0.75,0,0.108108,0.6,0.393324,0.0,0,0,0.668609,1,0,0
6377,0.684,0,0.202703,0.9,0.0,0.0,1,0,0.567526,1,0,0


IMPORTING 'keras' USING 'tensorflow' AND MAKING 3 LAYERS AND TRAINING THE DATA

In [36]:
import tensorflow as tf
from tensorflow import keras


model = keras.Sequential([
    keras.layers.Dense(14, input_shape=(12,), activation='relu'),
    keras.layers.Dense(5, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# opt = keras.optimizers.Adam(learning_rate=0.01)

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1fc8358e640>

ACCURACY TEST ON TEST DATA

In [37]:
model.evaluate(X_test, y_test)



[0.3671687841415405, 0.8504999876022339]

In [38]:
yp = model.predict(X_test)
yp[:5]

array([[0.06412637],
       [0.07539144],
       [0.07964632],
       [0.10522121],
       [0.10220948]], dtype=float32)