## **Data for Dashboard deployment**

In [55]:
# Importing project dependencies 

# Data handling
import pandas as pd
import numpy as np

# Feature Engineering and Modelling 
from sklearn.preprocessing import RobustScaler

# Model export for deployment 
import pickle

# Supressing warning messages 
from warnings import filterwarnings
filterwarnings("ignore")  

In [56]:
# Reading data into dataframe 
data = pd.read_csv('../Data/Churn_Modelling.csv')

data1 = pd.read_csv('../Data/Churn_Modelling.csv')
data1.drop(labels=['Exited'], axis=1, inplace=True)

In [57]:
# (SQL standard) Formatting column headers by removing potential capital letters and spaces in column headers 
data.columns = data.columns.str.lower()
data.columns = data.columns.str.replace(' ','_')

In [58]:
# Dropping fields that wont benefit the deployment
data.drop(labels=['rownumber','customerid','surname','exited'], axis=1, inplace=True)

In [59]:
# One Hot encoding for remaining categorical field 
data = pd.get_dummies(data, drop_first = False)

# Viewing first 5 rows of data 
data.head()

Unnamed: 0,creditscore,age,tenure,balance,numofproducts,hascrcard,isactivemember,estimatedsalary,geography_France,geography_Germany,geography_Spain,gender_Female,gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,1,1,0


In [60]:
scale_vars = ['creditscore', 'age', 'tenure', 'balance', 'numofproducts', 'estimatedsalary' ]

# Robust scaler to address outliers 
transformer = RobustScaler().fit(data[scale_vars])
data[scale_vars] = transformer.transform(data[scale_vars])

In [61]:
# Loading pickle model 
model = pickle.load(open(".././rf_churn_model.pkl", "rb"))

In [62]:
# Predicting based on orginal data 
pred_rf = model.predict(data.values)
pred_prob_rf = model.predict_proba(data.values)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [63]:
# Viewing the probabiliy of each target value 
pred_prob_rf

array([[0.34, 0.66],
       [0.9 , 0.1 ],
       [0.02, 0.98],
       ...,
       [0.29, 0.71],
       [0.36, 0.64],
       [0.91, 0.09]])

In [64]:
# Function getting the 1st column for the churn predictions.
def column(matrix, i):
    return [row[i] for row in matrix]

column(pred_prob_rf, 1)

[0.66,
 0.1,
 0.98,
 0.02,
 0.03,
 0.09,
 0.03,
 0.77,
 0.09,
 0.01,
 0.03,
 0.0,
 0.0,
 0.02,
 0.02,
 0.05,
 0.92,
 0.07,
 0.28,
 0.0,
 0.01,
 0.01,
 0.75,
 0.03,
 0.12,
 0.0,
 0.23,
 0.02,
 0.15,
 0.1,
 0.85,
 0.01,
 0.08,
 0.06,
 0.01,
 0.87,
 0.02,
 0.02,
 0.05,
 0.02,
 0.02,
 0.87,
 0.15,
 0.88,
 0.15,
 0.1,
 0.71,
 0.81,
 0.13,
 0.13,
 0.11,
 0.09,
 0.0,
 0.84,
 0.87,
 0.09,
 0.23,
 0.06,
 0.85,
 0.15,
 0.32,
 0.03,
 0.01,
 0.09,
 0.02,
 0.03,
 0.18,
 0.03,
 0.03,
 0.05,
 0.95,
 0.18,
 0.04,
 0.03,
 0.0,
 0.03,
 0.0,
 0.09,
 0.0,
 0.13,
 0.0,
 0.72,
 0.06,
 0.05,
 0.08,
 0.07,
 0.65,
 0.0,
 0.93,
 0.0,
 0.88,
 0.05,
 0.0,
 0.0,
 0.03,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.16,
 0.03,
 0.04,
 0.0,
 0.66,
 0.77,
 0.0,
 0.15,
 0.03,
 0.71,
 0.0,
 0.06,
 0.03,
 0.02,
 0.63,
 0.0,
 0.05,
 0.01,
 0.61,
 0.0,
 0.03,
 0.04,
 0.01,
 0.53,
 0.11,
 0.76,
 0.55,
 0.94,
 0.1,
 0.04,
 0.0,
 0.22,
 0.0,
 0.0,
 0.02,
 0.19,
 0.09,
 0.05,
 0.81,
 0.67,
 0.25,
 0.0,
 0.1,
 0.95,
 0.76,
 0.02,
 0.04,
 0.

In [65]:
# Adding prediction columns for PowerBI deployment 
data_deploy = data1.copy()
data_deploy['Predictions - Churn or Not'] = pred_rf
data_deploy['Predictions - Probability to Churn'] = column(pred_prob_rf, 1)
data_deploy['Predictions - Churn or Not Desc'] = 'Empty'
data_deploy['Predictions - Churn or Not Desc'][data_deploy['Predictions - Churn or Not'] == 0] = 'Retention'
data_deploy['Predictions - Churn or Not Desc'][data_deploy['Predictions - Churn or Not'] == 1] = 'Churn'
data_deploy.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Predictions - Churn or Not,Predictions - Probability to Churn,Predictions - Churn or Not Desc
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0.66,Churn
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0.1,Retention
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0.98,Churn
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0.02,Retention
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0.03,Retention


In [66]:
data_deploy.to_csv('../Data/data_deploy.csv')