In [None]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# **Customer Churn - Use Case Implementation**

# **Disclaimer**

All software and hardware used or referenced in this guide belong to their respective vendor. We developed this guide based on our development infrastructure and this guide may or may not work on others systems and technical infrastructure. We are not liable for any direct or indirect problems caused by users using this guide.

# **Executive Summary**

The purpose of this document is to provide adequate information to users to implement Logistic Regression or XGBoost (supervised machine learning) model in Google Colab. In order to achieve this, we are using one of the most common problem which is Customer Churn that occurs at most of the industries.

# **Problem Statement**

Our Client experiences high customer churn rate which indicates that the company is losing customers at an alarming rate and it can be due to various reasons. Our objective is to predict the customers accurately who are more likely to churn using Machine Learning Algorithm. This will help the company to know in advance, the customers who are more likely to leave company's service and compe up with retention strategies/policies.

# **Challenges**

To succeed in today’s complex business scenario, companies need to build and deploy an effective customer churn analysis model in order to monitor churn rate and maximize customer retention. Acquiring new customers always costs heavily and this makes the predictive churn model appealing for businesses that aim at retaining customers and maximizing profits.

# **STEP 0: INI FILE CONFIGURATION**

In [None]:
import os

vAR_Config = configparser.ConfigParser(allow_no_value=True)

vAR_INI_FILE_PATH = os.getenv('CUS_CHURN_FT_XG')

vAR_INI_FILE_PATH

vAR_Config.read(vAR_INI_FILE_PATH)

vAR_Data = vAR_Config.sections()

vAR_Config.sections()

vAR_Train_Data = vAR_Config['FILE PATH']['TRAINING_DATA']

vAR_Training_Data_Excel_Worsheet = vAR_Config['FILE PATH']['TRAINING_DATA_EXCEL_WORKSHEET']
print(vAR_Training_Data_Excel_Worsheet)

vAR_Train_Data_CUS = vAR_Config['FILE PATH']['TRAINING_DATA(CUS)']

vAR_Training_Data_Excel_Worsheet_CUS  = vAR_Config['FILE PATH']['TRAINING_DATA_EXCEL_WORKSHEET(CUS)']
print(vAR_Training_Data_Excel_Worsheet_CUS)

vAR_Training_Data_CBP = vAR_Config['FILE PATH']['TRAINING_DATA(CBP)']

vAR_Training_Data_Excel_Worsheet_CBP = vAR_Config['FILE PATH']['TRAINING_DATA_EXCEL_WORKSHEET(CBP)']
print(vAR_Training_Data_Excel_Worsheet_CBP)

vAR_Training_Data_CPP = vAR_Config['FILE PATH']['TRAINING_DATA(CPP)']

vAR_Training_Data_Excel_Worsheet_CPP = vAR_Config['FILE PATH']['TRAINING_DATA_EXCEL_WORKSHEET(CPP)']
print(vAR_Training_Data_Excel_Worsheet_CPP)

vAR_Training_Data_CSP = vAR_Config['FILE PATH']['TRAINING_DATA(CSP)']

vAR_Training_Data_Excel_Worsheet_CSP = vAR_Config['FILE PATH']['TRAINING_DATA_EXCEL_WORKSHEET(CSP)']
print(vAR_Training_Data_Excel_Worsheet_CSP)

vAR_Training_Data_CSQ = vAR_Config['FILE PATH']['TRAINING_DATA(CSQ)']

vAR_Training_Data_Excel_Worsheet_CSQ = vAR_Config['FILE PATH']['TRAINING_DATA_EXCEL_WORKSHEET(CSQ)']
print(vAR_Training_Data_Excel_Worsheet_CSQ)

vAR_Training_Data_CS = vAR_Config['FILE PATH']['TRAINING_DATA(CS)']

vAR_Training_Data_Excel_Worsheet_CS = vAR_Config['FILE PATH']['TRAINING_DATA_EXCEL_WORKSHEET(CS)']
print(vAR_Training_Data_Excel_Worsheet_CS)

vAR_Test_Data = vAR_Config['FILE PATH']['TEST_DATA']
print(vAR_Test_Data)

vAR_Test_Data_Excel_Worsheet = vAR_Config['FILE PATH']['TEST_DATA_EXCEL_WORKSHEET']
print(vAR_Test_Data_Excel_Worsheet)

vAR_Model_Outcome_LG = vAR_Config['FILE PATH']['MODEL_OUTCOME_LG']
print(vAR_Model_Outcome_LG)
vAR_Model_Outcome_XG = vAR_Config['FILE PATH']['MODEL_OUTCOME_XG']
print(vAR_Model_Outcome_XG)

# **STEP 1: IMPORT THE LIBRARIES**

In [1]:
import pandas as vAR_pd

import xgboost as vAR_xgb 

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from xgboost import XGBClassifier

!pip install featuretools
import featuretools as ft

Collecting featuretools
[?25l  Downloading https://files.pythonhosted.org/packages/e2/13/d28a29bb0438d69496a97a4c6818ee3c0b3a0f9ade169bd252c05370e495/featuretools-0.23.2-py3-none-any.whl (296kB)
[K     |█                               | 10kB 22.5MB/s eta 0:00:01[K     |██▏                             | 20kB 28.6MB/s eta 0:00:01[K     |███▎                            | 30kB 20.2MB/s eta 0:00:01[K     |████▍                           | 40kB 16.9MB/s eta 0:00:01[K     |█████▌                          | 51kB 14.7MB/s eta 0:00:01[K     |██████▋                         | 61kB 14.3MB/s eta 0:00:01[K     |███████▊                        | 71kB 13.2MB/s eta 0:00:01[K     |████████▉                       | 81kB 14.4MB/s eta 0:00:01[K     |██████████                      | 92kB 13.8MB/s eta 0:00:01[K     |███████████                     | 102kB 14.8MB/s eta 0:00:01[K     |████████████▏                   | 112kB 14.8MB/s eta 0:00:01[K     |█████████████▎                  |

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **STEP 2: IMPORT THE TRAINING DATA**

In [5]:
import warnings
warnings.filterwarnings('ignore')
Customer_Label = vAR_pd.read_excel("/content/drive/MyDrive/Customer_Churn_Datasets/Customer_label.xlsx")
Customer = vAR_pd.read_excel("/content/drive/MyDrive/Customer_Churn_Datasets/Customer.xlsx")
Customer_Buying_Pattern = vAR_pd.read_excel("/content/drive/MyDrive/Customer_Churn_Datasets/customer_buying_pattern.xlsx")
Customer_Product_Price = vAR_pd.read_excel("/content/drive/MyDrive/Customer_Churn_Datasets/Customer_Product_Price.xlsx")
Customer_Spending_Power = vAR_pd.read_excel("/content/drive/MyDrive/Customer_Churn_Datasets/Customer_Spending_Power.xlsx")
Customer_Service_Quality = vAR_pd.read_excel("/content/drive/MyDrive/Customer_Churn_Datasets/Customer_Service_Quality.xlsx")
Customer_Satisfaction = vAR_pd.read_excel("/content/drive/MyDrive/Customer_Churn_Datasets/Customer_Satisfaction.xlsx")

In [6]:
#Checking for null values
print(Customer_Label.isnull().sum())
print(Customer.isnull().sum())
print(Customer_Buying_Pattern.isnull().sum())
print(Customer_Product_Price.isnull().sum())
print(Customer_Spending_Power.isnull().sum())
print(Customer_Service_Quality.isnull().sum())
print(Customer_Satisfaction.isnull().sum())

CustomerID          0
Customer Code       0
CustomerRegion      0
CustomerLocation    0
CustomerChurn       1
dtype: int64
CustomerID          0
Customer Code       0
CustomerRegion      0
CustomerLocation    0
dtype: int64
CustomerID                       0
Product                          0
Quantity(in lots)                0
CBPID                            0
customer lifetime in years       0
Average yearly purchase          0
average Monthly wise purchase    0
Last year purchase               0
dtype: int64
CustomerID                     0
Product                        0
CPPID                          0
Price Amount                   0
product average unit price     0
last year unit price           0
dtype: int64
CustomerID                              0
Product                                 0
CSPID                                   0
 Amount spent in lifetime(in millon)    0
Average amount spend per year           0
last year amount spend                  0
dtype: int64
Custome

In [7]:
#Replacing missing value with Mode value 
Customer_Label['CustomerChurn'].fillna(Customer_Label['CustomerChurn'].mode()[0], inplace=True)
print(Customer_Label.isnull().sum())

CustomerID          0
Customer Code       0
CustomerRegion      0
CustomerLocation    0
CustomerChurn       0
dtype: int64


# **STEP 3(b): FEATURE SELECTION(USING FEATURE TOOLS)**

In [8]:
#DEFINING THE ENTITIES
es = ft.EntitySet(id="CUSTOMER_CHURN")
es1 = es.entity_from_dataframe(entity_id = 'Customer', dataframe = Customer, index='CustomerID')
es2 = es.entity_from_dataframe(entity_id = 'CustomerBuyingPattern', dataframe = Customer_Buying_Pattern, index='CBPID')
es3 = es.entity_from_dataframe(entity_id = 'CustomerProductPurchase', dataframe = Customer_Product_Price, index='CPPID')
es4 = es.entity_from_dataframe(entity_id = 'CustomerSpendingPower', dataframe = Customer_Spending_Power, index = 'CSPID')
es5 = es.entity_from_dataframe(entity_id = 'CustomerServiceQuality', dataframe = Customer_Service_Quality, index = 'CSQID')
es6 = es.entity_from_dataframe(entity_id = 'CustomerSatisfaction', dataframe = Customer_Satisfaction, index = 'CSID')
es


Entityset: CUSTOMER_CHURN
  Entities:
    Customer [Rows: 2104, Columns: 4]
    CustomerBuyingPattern [Rows: 2104, Columns: 8]
    CustomerProductPurchase [Rows: 2104, Columns: 6]
    CustomerSpendingPower [Rows: 2104, Columns: 6]
    CustomerServiceQuality [Rows: 2104, Columns: 6]
    CustomerSatisfaction [Rows: 2104, Columns: 6]
  Relationships:
    No relationships

In [9]:
#DEFINING THE RELATIONSHIP
es.add_relationship(ft.Relationship(es['Customer']['CustomerID'],es['CustomerBuyingPattern']['CustomerID']))
es.add_relationship(ft.Relationship(es['Customer']['CustomerID'],es['CustomerProductPurchase']['CustomerID']))
es.add_relationship(ft.Relationship(es['Customer']['CustomerID'],es['CustomerSpendingPower']['CustomerID']))
es.add_relationship(ft.Relationship(es['Customer']['CustomerID'],es['CustomerServiceQuality']['CustomerID']))
es.add_relationship(ft.Relationship(es['Customer']['CustomerID'],es['CustomerSatisfaction']['CustomerID']))

Entityset: CUSTOMER_CHURN
  Entities:
    Customer [Rows: 2104, Columns: 4]
    CustomerBuyingPattern [Rows: 2104, Columns: 8]
    CustomerProductPurchase [Rows: 2104, Columns: 6]
    CustomerSpendingPower [Rows: 2104, Columns: 6]
    CustomerServiceQuality [Rows: 2104, Columns: 6]
    CustomerSatisfaction [Rows: 2104, Columns: 6]
  Relationships:
    CustomerBuyingPattern.CustomerID -> Customer.CustomerID
    CustomerProductPurchase.CustomerID -> Customer.CustomerID
    CustomerSpendingPower.CustomerID -> Customer.CustomerID
    CustomerServiceQuality.CustomerID -> Customer.CustomerID
    CustomerSatisfaction.CustomerID -> Customer.CustomerID

In [10]:
feature_matrix_Customer, feature_defs = ft.dfs(entityset=es, target_entity="Customer")
feature_matrix_Customer

Unnamed: 0_level_0,Customer Code,CustomerRegion,CustomerLocation,COUNT(CustomerBuyingPattern),MAX(CustomerBuyingPattern.Average yearly purchase),MAX(CustomerBuyingPattern.Last year purchase),MAX(CustomerBuyingPattern.Quantity(in lots)),MAX(CustomerBuyingPattern.average Monthly wise purchase),MAX(CustomerBuyingPattern.customer lifetime in years),MEAN(CustomerBuyingPattern.Average yearly purchase),MEAN(CustomerBuyingPattern.Last year purchase),MEAN(CustomerBuyingPattern.Quantity(in lots)),MEAN(CustomerBuyingPattern.average Monthly wise purchase),MEAN(CustomerBuyingPattern.customer lifetime in years),MIN(CustomerBuyingPattern.Average yearly purchase),MIN(CustomerBuyingPattern.Last year purchase),MIN(CustomerBuyingPattern.Quantity(in lots)),MIN(CustomerBuyingPattern.average Monthly wise purchase),MIN(CustomerBuyingPattern.customer lifetime in years),MODE(CustomerBuyingPattern.Product),NUM_UNIQUE(CustomerBuyingPattern.Product),SKEW(CustomerBuyingPattern.Average yearly purchase),SKEW(CustomerBuyingPattern.Last year purchase),SKEW(CustomerBuyingPattern.Quantity(in lots)),SKEW(CustomerBuyingPattern.average Monthly wise purchase),SKEW(CustomerBuyingPattern.customer lifetime in years),STD(CustomerBuyingPattern.Average yearly purchase),STD(CustomerBuyingPattern.Last year purchase),STD(CustomerBuyingPattern.Quantity(in lots)),STD(CustomerBuyingPattern.average Monthly wise purchase),STD(CustomerBuyingPattern.customer lifetime in years),SUM(CustomerBuyingPattern.Average yearly purchase),SUM(CustomerBuyingPattern.Last year purchase),SUM(CustomerBuyingPattern.Quantity(in lots)),SUM(CustomerBuyingPattern.average Monthly wise purchase),SUM(CustomerBuyingPattern.customer lifetime in years),COUNT(CustomerProductPurchase),MAX(CustomerProductPurchase.Price Amount),MAX(CustomerProductPurchase.last year unit price),MAX(CustomerProductPurchase.product average unit price ),...,MAX(CustomerServiceQuality.Service Failure Rate%pa),MAX(CustomerServiceQuality.last year failure rate%),MEAN(CustomerServiceQuality.Service Call),MEAN(CustomerServiceQuality.Service Failure Rate%pa),MEAN(CustomerServiceQuality.last year failure rate%),MIN(CustomerServiceQuality.Service Call),MIN(CustomerServiceQuality.Service Failure Rate%pa),MIN(CustomerServiceQuality.last year failure rate%),MODE(CustomerServiceQuality.Product),NUM_UNIQUE(CustomerServiceQuality.Product),SKEW(CustomerServiceQuality.Service Call),SKEW(CustomerServiceQuality.Service Failure Rate%pa),SKEW(CustomerServiceQuality.last year failure rate%),STD(CustomerServiceQuality.Service Call),STD(CustomerServiceQuality.Service Failure Rate%pa),STD(CustomerServiceQuality.last year failure rate%),SUM(CustomerServiceQuality.Service Call),SUM(CustomerServiceQuality.Service Failure Rate%pa),SUM(CustomerServiceQuality.last year failure rate%),COUNT(CustomerSatisfaction),MAX(CustomerSatisfaction.Average call in year),MAX(CustomerSatisfaction.Service Call),MAX(CustomerSatisfaction.last year calls),MEAN(CustomerSatisfaction.Average call in year),MEAN(CustomerSatisfaction.Service Call),MEAN(CustomerSatisfaction.last year calls),MIN(CustomerSatisfaction.Average call in year),MIN(CustomerSatisfaction.Service Call),MIN(CustomerSatisfaction.last year calls),MODE(CustomerSatisfaction.Product),NUM_UNIQUE(CustomerSatisfaction.Product),SKEW(CustomerSatisfaction.Average call in year),SKEW(CustomerSatisfaction.Service Call),SKEW(CustomerSatisfaction.last year calls),STD(CustomerSatisfaction.Average call in year),STD(CustomerSatisfaction.Service Call),STD(CustomerSatisfaction.last year calls),SUM(CustomerSatisfaction.Average call in year),SUM(CustomerSatisfaction.Service Call),SUM(CustomerSatisfaction.last year calls)
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
18001,CUST1,AP,UAE,1.0,370.0,18.0,21.0,33.0,5.0,370.0,18.0,21.0,33.0,5.0,370.0,18.0,21.0,33.0,5.0,Floor Mopper,1.0,,,,,,,,,,,370.0,18.0,21.0,33.0,5.0,1.0,1877.0,65.0,59.0,...,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,0.0,0.0,0.0
18002,CUST2,Europe,United Kingdom,1.0,353.0,1.0,12.0,29.0,6.0,353.0,1.0,12.0,29.0,6.0,353.0,1.0,12.0,29.0,6.0,Floor Mopper,1.0,,,,,,,,,,,353.0,1.0,12.0,29.0,6.0,0.0,,,,...,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,11.0,44.0,58.0,11.000000,44.000000,58.0,11.0,44.0,58.0,Floor Mat,1.0,,,,,,,11.0,44.0,58.0
18003,CUST3,Asia,India,2.0,468.0,31.0,83.0,24.0,4.0,446.0,26.0,71.0,22.0,4.0,424.0,21.0,59.0,20.0,4.0,Floor Mat,1.0,,,,,,31.112698,7.071068,16.970563,2.828427,0.000000,892.0,52.0,142.0,44.0,8.0,1.0,1440.0,59.0,60.0,...,42.0,42.0,49.000000,42.000000,42.000000,49.0,42.0,42.0,Floor Mat,1.0,,,,,,,49.0,42.0,42.0,1.0,20.0,51.0,67.0,20.000000,51.000000,67.0,20.0,51.0,67.0,Towel,1.0,,,,,,,20.0,51.0,67.0
18004,CUST4,Australia,Australia,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,1.0,711.0,69.0,58.0,...,66.0,0.0,79.000000,66.000000,0.000000,79.0,66.0,0.0,Floor Mopper,1.0,,,,,,,79.0,66.0,0.0,2.0,5.0,83.0,74.0,4.000000,74.500000,53.0,3.0,66.0,32.0,Floor Mat,2.0,,,,1.414214,12.020815,29.698485,8.0,149.0,106.0
18005,CUST5,Europe,Germany,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,2.0,1315.0,69.0,56.0,...,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,2.0,76.0,2.0,2.000000,76.000000,2.0,2.0,76.0,2.0,Towel,1.0,,,,,,,2.0,76.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20100,CUST2100,North America,JAPAN,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,3.0,1712.0,67.0,58.0,...,56.0,76.0,73.000000,56.000000,76.000000,73.0,56.0,76.0,Floor Mopper,1.0,,,,,,,73.0,56.0,76.0,0.0,,,,,,,,,,,,,,,,,,0.0,0.0,0.0
20101,CUST2101,Asia,UAE,1.0,301.0,30.0,18.0,30.0,6.0,301.0,30.0,18.0,30.0,6.0,301.0,30.0,18.0,30.0,6.0,Towel,1.0,,,,,,,,,,,301.0,30.0,18.0,30.0,6.0,1.0,628.0,57.0,57.0,...,44.0,12.0,83.000000,44.000000,12.000000,83.0,44.0,12.0,Floor Mopper,1.0,,,,,,,83.0,44.0,12.0,2.0,16.0,77.0,72.0,15.000000,67.000000,51.5,14.0,57.0,31.0,Floor Mat,1.0,,,,1.414214,14.142136,28.991378,30.0,134.0,103.0
20102,CUST2102,North America,DUBAI,1.0,404.0,29.0,16.0,34.0,7.0,404.0,29.0,16.0,34.0,7.0,404.0,29.0,16.0,34.0,7.0,Floor Mopper,1.0,,,,,,,,,,,404.0,29.0,16.0,34.0,7.0,3.0,1775.0,68.0,59.0,...,58.0,58.0,59.666667,37.666667,34.666667,41.0,1.0,8.0,Floor Mat,3.0,-1.293343,-1.701304,-0.585583,16.653328,31.817186,25.166115,179.0,113.0,104.0,1.0,3.0,75.0,42.0,3.000000,75.000000,42.0,3.0,75.0,42.0,Floor Mopper,1.0,,,,,,,3.0,75.0,42.0
20103,CUST2103,Asia,FRANCE,1.0,356.0,1.0,100.0,30.0,6.0,356.0,1.0,100.0,30.0,6.0,356.0,1.0,100.0,30.0,6.0,Floor Mopper,1.0,,,,,,,,,,,356.0,1.0,100.0,30.0,6.0,0.0,,,,...,,,,,,,,,,,,,,,,,0.0,0.0,0.0,3.0,13.0,83.0,67.0,11.666667,71.666667,44.0,9.0,54.0,20.0,Floor Mat,2.0,-1.732051,-1.531567,-0.191013,2.309401,15.502688,23.515952,35.0,215.0,132.0


In [11]:
feature_matrix_Customer, feature_defs = ft.dfs(entityset=es, target_entity="Customer", agg_primitives=["SUM"], max_depth=2) 
feature_matrix_Customer

Unnamed: 0_level_0,Customer Code,CustomerRegion,CustomerLocation,SUM(CustomerBuyingPattern.Average yearly purchase),SUM(CustomerBuyingPattern.Last year purchase),SUM(CustomerBuyingPattern.Quantity(in lots)),SUM(CustomerBuyingPattern.average Monthly wise purchase),SUM(CustomerBuyingPattern.customer lifetime in years),SUM(CustomerProductPurchase.Price Amount),SUM(CustomerProductPurchase.last year unit price),SUM(CustomerProductPurchase.product average unit price ),SUM(CustomerSpendingPower. Amount spent in lifetime(in millon)),SUM(CustomerSpendingPower.Average amount spend per year),SUM(CustomerServiceQuality.Service Call),SUM(CustomerServiceQuality.Service Failure Rate%pa),SUM(CustomerServiceQuality.last year failure rate%),SUM(CustomerSatisfaction.Average call in year),SUM(CustomerSatisfaction.Service Call),SUM(CustomerSatisfaction.last year calls)
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
18001,CUST1,AP,UAE,370.0,18.0,21.0,33.0,5.0,1877.0,65.0,59.0,1647.0,99.0,0.0,0.0,0.0,0.0,0.0,0.0
18002,CUST2,Europe,United Kingdom,353.0,1.0,12.0,29.0,6.0,0.0,0.0,0.0,3887.0,457.0,0.0,0.0,0.0,11.0,44.0,58.0
18003,CUST3,Asia,India,892.0,52.0,142.0,44.0,8.0,1440.0,59.0,60.0,1013.0,110.0,49.0,42.0,42.0,20.0,51.0,67.0
18004,CUST4,Australia,Australia,0.0,0.0,0.0,0.0,0.0,711.0,69.0,58.0,2538.0,322.5,79.0,66.0,0.0,8.0,149.0,106.0
18005,CUST5,Europe,Germany,0.0,0.0,0.0,0.0,0.0,2431.0,126.0,110.0,0.0,0.0,0.0,0.0,0.0,2.0,76.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20100,CUST2100,North America,JAPAN,0.0,0.0,0.0,0.0,0.0,4667.0,192.0,169.0,0.0,0.0,73.0,56.0,76.0,0.0,0.0,0.0
20101,CUST2101,Asia,UAE,301.0,30.0,18.0,30.0,6.0,628.0,57.0,57.0,1623.0,144.0,83.0,44.0,12.0,30.0,134.0,103.0
20102,CUST2102,North America,DUBAI,404.0,29.0,16.0,34.0,7.0,3956.0,191.0,166.0,1336.0,189.0,179.0,113.0,104.0,3.0,75.0,42.0
20103,CUST2103,Asia,FRANCE,356.0,1.0,100.0,30.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,215.0,132.0


In [12]:
vAR_Featuresft = feature_matrix_Customer.iloc[:,3:]
vAR_Featuresft.head()

Unnamed: 0_level_0,SUM(CustomerBuyingPattern.Average yearly purchase),SUM(CustomerBuyingPattern.Last year purchase),SUM(CustomerBuyingPattern.Quantity(in lots)),SUM(CustomerBuyingPattern.average Monthly wise purchase),SUM(CustomerBuyingPattern.customer lifetime in years),SUM(CustomerProductPurchase.Price Amount),SUM(CustomerProductPurchase.last year unit price),SUM(CustomerProductPurchase.product average unit price ),SUM(CustomerSpendingPower. Amount spent in lifetime(in millon)),SUM(CustomerSpendingPower.Average amount spend per year),SUM(CustomerServiceQuality.Service Call),SUM(CustomerServiceQuality.Service Failure Rate%pa),SUM(CustomerServiceQuality.last year failure rate%),SUM(CustomerSatisfaction.Average call in year),SUM(CustomerSatisfaction.Service Call),SUM(CustomerSatisfaction.last year calls)
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
18001,370.0,18.0,21.0,33.0,5.0,1877.0,65.0,59.0,1647.0,99.0,0.0,0.0,0.0,0.0,0.0,0.0
18002,353.0,1.0,12.0,29.0,6.0,0.0,0.0,0.0,3887.0,457.0,0.0,0.0,0.0,11.0,44.0,58.0
18003,892.0,52.0,142.0,44.0,8.0,1440.0,59.0,60.0,1013.0,110.0,49.0,42.0,42.0,20.0,51.0,67.0
18004,0.0,0.0,0.0,0.0,0.0,711.0,69.0,58.0,2538.0,322.5,79.0,66.0,0.0,8.0,149.0,106.0
18005,0.0,0.0,0.0,0.0,0.0,2431.0,126.0,110.0,0.0,0.0,0.0,0.0,0.0,2.0,76.0,2.0


# **DEFINING THE LABELS**

In [13]:
vAR_label = Customer_Label.iloc[:,4:]
vAR_label

Unnamed: 0,CustomerChurn
0,1.0
1,1.0
2,0.0
3,0.0
4,1.0
...,...
2099,0.0
2100,1.0
2101,0.0
2102,1.0


In [14]:
vAR_Featuresft=vAR_Featuresft.dropna()

In [15]:
vAR_Featuresft=vAR_Featuresft.astype(int)

# **STEP 4: IMPORT THE MODEL**

In [16]:
vAR_X_TRAIN, vAR_X_TEST, vAR_Y_TRAIN, vAR_Y_TEST = train_test_split(vAR_Featuresft, vAR_label, test_size=0.20, random_state=0)

#Logistic Regression
vAR_Model1 = LogisticRegression()

#XGBoost
vAR_Model2 = XGBClassifier(eta=0.01,gamma=10)

In [None]:
#Fitting the logistic regression model
vAR_Model1.fit(vAR_X_TRAIN,vAR_Y_TRAIN)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
#Fitting the XGBoost model
vAR_Model2.fit(vAR_X_TRAIN,vAR_Y_TRAIN)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eta=0.01, gamma=10,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [None]:
#Prediction using Logistic regression
vAR_Labels_predLG = vAR_Model1.predict(vAR_X_TEST)

In [None]:
#Prediction using XGBoost
vAR_Labels_predXG = vAR_Model2.predict(vAR_X_TEST)

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(vAR_Y_TEST, vAR_Labels_predLG)

array([[224,  26],
       [154,  17]])

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(vAR_Y_TEST, vAR_Labels_predXG)

array([[250,   0],
       [171,   0]])

In [None]:
#Checking accuracy for XGBoost
from sklearn.metrics import accuracy_score
accuracy_score(vAR_Y_TEST, vAR_Labels_predXG)

0.5938242280285035

In [None]:
#Checking accuracy for Logistic Regression
from sklearn.metrics import accuracy_score
accuracy_score(vAR_Y_TEST, vAR_Labels_predLG)

0.5724465558194775

In [None]:
#from sklearn.model_selection import GridSearchCV
#from sklearn import linear_model
#import numpy as np
#logistic = linear_model.LogisticRegression()
#penalty = ['l1', 'l2']
#C = np.logspace(6,8,10)
#hyperparameters = dict(C=C, penalty=penalty)
#clf = GridSearchCV(logistic, hyperparameters, scoring='f1', cv=5, verbose=0)
#y_fit=clf.fit(vAR_X_TRAIN,vAR_Y_TRAIN)
#y_pred = clf.predict(vAR_X_TEST)

In [None]:
#from sklearn.metrics import confusion_matrix
#confusion_matrix(vAR_Y_TEST, y_pred)

array([[225,  25],
       [154,  17]])

In [None]:
#from sklearn.ensemble import RandomForestClassifier
#modelrandom3 = RandomForestClassifier(n_estimators= 100, min_samples_split= 5, min_samples_leaf=4, max_features='auto', max_depth=15, bootstrap=True)
#y_fit = modelrandom3.fit(vAR_X_TRAIN,vAR_Y_TRAIN)
#y_pred = y_fit.predict(vAR_X_TEST)
#from sklearn.metrics import confusion_matrix
#confusion_matrix(vAR_Y_TEST, y_pred)

array([[176,  74],
       [131,  40]])

In [None]:
#from sklearn.metrics import accuracy_score
#accuracy_score(vAR_Y_TEST, y_pred)

0.5130641330166271

In [None]:
#from sklearn.ensemble import RandomForestClassifier
#from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
#from sklearn.metrics import roc_curve, precision_recall_curve, auc, make_scorer, recall_score, accuracy_score, precision_score, confusion_matrix
#from sklearn.metrics import roc_auc_score

#from tqdm import tqdm
#random = RandomForestClassifier(n_jobs=-1)
#param_grid = {
#    'min_samples_split': [3, 5, 10], 
#    'n_estimators' : [100, 300],
#    'max_depth': [3, 5, 15, 25]
#}
#
#scorers = {
#    'precision_score': make_scorer(precision_score),
#    'recall_score': make_scorer(recall_score),
#    'accuracy_score': make_scorer(accuracy_score),
#    'roc_auc_score': make_scorer(roc_auc_score)
#}
#skf = StratifiedKFold(n_splits=5)
#clf = GridSearchCV(random, param_grid, cv=skf, scoring='f1', return_train_score=True, n_jobs=-1)
#clf = clf.fit(vAR_X_TRAIN,vAR_Y_TRAIN)

#y_pred = clf.predict(vAR_X_TEST)
#from sklearn.metrics import confusion_matrix
#confusion_matrix(vAR_Y_TEST, y_pred)

array([[163,  87],
       [124,  47]])

In [None]:
#from sklearn.metrics import accuracy_score
#accuracy_score(vAR_Y_TEST, y_pred)

0.498812351543943

In [None]:
#Dont run this - copying files from colab to google drive
#!cp Customer.xlsx /content/drive/MyDrive/Customer_Churn_Datasets/
#!cp Customer_Product_Price.xlsx /content/drive/MyDrive/Customer_Churn_Datasets/
#!cp Customer_Satisfaction.xlsx /content/drive/MyDrive/Customer_Churn_Datasets/
#!cp Customer_Service_Quality.xlsx /content/drive/MyDrive/Customer_Churn_Datasets/
#!cp Customer_Spending_Power.xlsx /content/drive/MyDrive/Customer_Churn_Datasets/
#!cp Customer_label.xlsx /content/drive/MyDrive/Customer_Churn_Datasets/
#!cp customer_buying_pattern.xlsx /content/drive/MyDrive/Customer_Churn_Datasets/