In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(r"Churn_Modelling.csv")

In [3]:
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [4]:
df.Exited.value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [5]:
X = df.iloc[:,3:-1]
y = df.iloc[:,-1:]

In [6]:
from sklearn.preprocessing import LabelEncoder,OneHotEncoder

In [7]:
import numpy as np

In [8]:
import pickle

In [9]:
class CategoricalProcessing:
    
    def __init__(self):
        self.lbl_enc = LabelEncoder()
        self.one_h_enc = OneHotEncoder(drop='first')
    
    def fit_transform(self,df,col_name,multi_lbl):
        if multi_lbl:
            fit_lbl = self.lbl_enc.fit(df[col_name])
            lbl_enc_t = fit_lbl.transform(df[col_name])
            lbl_enc_t = lbl_enc_t.reshape(-1,1)
            fit_one = self.one_h_enc.fit(lbl_enc_t)
            one_h_enc_t = fit_one.transform(lbl_enc_t).toarray()
            var = [fit_lbl,fit_one]
            for i in range(0,len(var)):
                pickle.dump(var[i], open(file=str(col_name)+'_step_'+str(i)+'.pkl',mode = 'wb'))
        else:
            fit_one = self.one_h_enc.fit(np.array(df[col_name]).reshape(-1,1))
            one_h_enc_t = fit_one.transform(np.array(df[col_name]).reshape(-1,1)).toarray()
            pickle.dump(fit_one, open(file=str(col_name)+'_one_h_enc_'+'.pkl',mode = 'wb'))
 
        return one_h_enc_t

In [10]:
a = CategoricalProcessing()

In [11]:
geography = pd.DataFrame(a.fit_transform(X,'Geography',True),columns=['Country_1', 'Country_2'])

In [12]:
gender = pd.DataFrame(a.fit_transform(X,'Gender',False),columns=['Gender'])

In [13]:
X = X.iloc[:,[0,3,4,5,6,7,8,9]]

In [14]:
X = pd.concat([X,geography, gender],axis = 1)

In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [16]:
skl = StandardScaler()

In [17]:
from sklearn.pipeline import Pipeline

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1429)

In [19]:
from imblearn.over_sampling import SMOTE

In [20]:
smt = SMOTE(sampling_strategy=1.0, n_jobs=4)

In [21]:
X_train_smt, y_train_smt = smt.fit_resample(X_train, y_train)

In [22]:
pipe = Pipeline(steps=[('standard_scaling',skl)])

In [23]:
pipe.fit(X_train_smt)

Pipeline(steps=[('standard_scaling', StandardScaler())])

In [24]:
X_train_smt = pipe.transform(X_train_smt)

In [25]:
X_test = pipe.transform(X_test)

In [26]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

In [27]:
import wandb
from wandb.keras import WandbCallback
# Step1: Initialize W&B run
wandb.init(project='Chunr_Model')
config = wandb.config
config.learning_rate = 0.01


wandb: Currently logged in as: b2bmle (use `wandb login --relogin` to force relogin)


In [28]:
weights_assigned={0:1,1:4}

In [29]:
from kerastuner import RandomSearch

In [30]:
def build_model(hp):
    model = Sequential()
    for i in range(hp.Int('num_layers',2,20)):
        model.add(Dense(units = hp.Int('units_'+str(i), min_value = 32, max_value = 512, step = 32), activation = 'relu',
                        kernel_initializer = hp.Choice('Weights',['he_normal','glorot_uniform']),input_dim = 11))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
    model.add(Dense(1, activation = 'sigmoid'))
    model.compile(optimizer=Adam(hp.Choice('learning_rate',[1e-2, 1e-3, 1e-4])), loss='binary_crossentropy', metrics = ['accuracy'])
    return model

In [31]:
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='project1',
    project_name='Churn')

INFO:tensorflow:Reloading Oracle from existing project project1\Churn\oracle.json
INFO:tensorflow:Reloading Tuner from project1\Churn\tuner0.json


In [32]:
tuner.search_space_summary()

Search space summary
Default search space size: 22
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
Weights (Choice)
{'default': 'he_normal', 'conditions': [], 'values': ['he_normal', 'glorot_uniform'], 'ordered': False}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
units_2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_3 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_4 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_5 (Int)
{'default': Non

In [33]:
tuner.search(X_train, y_train,
             epochs=5,
             validation_data=(X_test, y_test),callbacks = [EarlyStopping(monitor='val_accuracy', min_delta = 0.5, patience=1), WandbCallback()], class_weight=weights_assigned)

INFO:tensorflow:Oracle triggered exit


In [34]:
model = tuner.get_best_models(1)[0]

In [35]:
tuner.results_summary()

Results summary
Results in project1\Churn
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
num_layers: 4
units_0: 192
Weights: glorot_uniform
units_1: 160
learning_rate: 0.0001
units_2: 32
units_3: 32
Score: 0.6754545470078787
Trial summary
Hyperparameters:
num_layers: 13
units_0: 320
Weights: he_normal
units_1: 320
learning_rate: 0.0001
units_2: 96
units_3: 192
units_4: 128
units_5: 128
units_6: 96
units_7: 32
units_8: 512
units_9: 480
units_10: 352
units_11: 64
units_12: 160
units_13: 96
units_14: 224
units_15: 416
units_16: 352
units_17: 96
units_18: 128
Score: 0.6311111251513163
Trial summary
Hyperparameters:
num_layers: 17
units_0: 320
Weights: he_normal
units_1: 384
learning_rate: 0.001
units_2: 192
units_3: 384
units_4: 96
units_5: 480
units_6: 128
units_7: 160
units_8: 384
units_9: 128
units_10: 288
units_11: 416
units_12: 96
units_13: 320
units_14: 352
units_15: 416
units_16: 96
units_17: 480
units_18: 384
Score: 0.585353533

In [36]:
from tensorflow.keras.models import save_model

In [37]:
model.evaluate(X_test, y_test)



[0.5862358212471008, 0.7845454812049866]

In [38]:
save_model(model, 'churn.h5')

In [39]:
from sklearn.metrics import classification_report, confusion_matrix

In [40]:
print(confusion_matrix(y_test, model.predict_classes(X_test)))

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
[[2562   52]
 [ 659   27]]


In [41]:
print(classification_report(y_test, model.predict_classes(X_test)))

              precision    recall  f1-score   support

           0       0.80      0.98      0.88      2614
           1       0.34      0.04      0.07       686

    accuracy                           0.78      3300
   macro avg       0.57      0.51      0.47      3300
weighted avg       0.70      0.78      0.71      3300



In [42]:
pickle.dump(pipe,open('scaling.pkl','wb'))

In [43]:
scl = pickle.load(open('scaling.pkl','rb'))
geo_lbl = pickle.load(open('Geography_step_0.pkl','rb'))
geo_ohe = pickle.load(open('Geography_step_1.pkl','rb'))
gender_ohe = pickle.load(open('Gender_one_h_enc_.pkl','rb'))

In [44]:
data_in = df.iloc[-1,:].values
data_in

array([10000, 15628319, 'Walker', 792, 'France', 'Female', 28, 4,
       130142.79, 1, 1, 0, 38190.78, 0], dtype=object)

In [45]:
geo_in = geo_ohe.transform(geo_lbl.transform([data_in[4]]).reshape(-1,1)).toarray()

In [46]:
gender_in = gender_ohe.transform(np.array(data_in[5]).reshape(-1,1)).toarray()

In [47]:
len(df.columns)

14

In [48]:
inp = []
for i in [3,6,7,8,9,10,11,12]:
    inp.append(data_in[i])

In [49]:
from numpy import hstack

In [50]:
inp = hstack((np.array(inp).reshape(-1,8), geo_in[0].reshape(1,-1), gender_in[0].reshape(1,-1)))

In [51]:
model.predict_classes(inp)

array([[1]])

In [52]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [53]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [54]:

import pickle
import numpy as np
from tensorflow.keras.models import load_model
from numpy import hstack
model = load_model('churn.h5')
scl = pickle.load(open('scaling.pkl','rb'))
geo_lbl = pickle.load(open('Geography_step_0.pkl','rb'))
geo_ohe = pickle.load(open('Geography_step_1.pkl','rb'))
gender_ohe = pickle.load(open('Gender_one_h_enc_.pkl','rb'))
CreditScore = 608
Geography = 'Spain'
Gender = 'Female'
Age = 41
Tenure = 1
Balance = 83807.86
NumOfProducts = 1
HasCrCard = 0
IsActiveMember = 1
EstimatedSalary = 112542.58



In [55]:
geo_in = geo_ohe.transform(geo_lbl.transform([Geography]).reshape(-1,1)).toarray()

gender_in = gender_ohe.transform(np.array(Gender).reshape(-1,1)).toarray()

input_data = []
for i in [int(CreditScore), int(Age), int(Tenure), float(Balance), int(NumOfProducts), int(HasCrCard), int(IsActiveMember),float(EstimatedSalary)]:
    input_data.append(i)

input_data = hstack((np.array(input_data).reshape(-1,8), geo_in[0].reshape(1,-1), gender_in[0].reshape(1,-1)))

In [56]:
scl.transform(input_data)

array([[-0.42989714,  0.03277908, -1.44207632,  0.01673876, -0.7319796 ,
        -1.27360073,  1.29998743,  0.21385586, -0.70920136,  1.97364605,
        -1.05459489]])

In [57]:
model.predict_classes(input_data)

array([[1]])

In [58]:
model.predict_proba(X_test[4].reshape(-1,11))

Instructions for updating:
Please use `model.predict()` instead.


array([[0.39487955]], dtype=float32)