In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from evidently import ColumnMapping
from evidently.test_suite import TestSuite
from evidently.test_preset import DataStabilityTestPreset

from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, DataQualityPreset, ClassificationPreset
import warnings
warnings.simplefilter('ignore')

In [3]:
ref_data = pd.read_csv("/Users/tarakram/Documents/Customer_Churn_Classification/data/processed/pre_processed_data.csv")
cur_data = pd.read_csv("/Users/tarakram/Documents/Customer_Churn_Classification/data/processed/test_data.csv")

In [4]:
ref_data['Churn'].value_counts()

Churn
0    5156
1    1853
Name: count, dtype: int64

In [5]:
ref_X = ref_data.drop(["Churn"], axis=1)
ref_y = ref_data["Churn"]

cur_X = cur_data.drop(["Churn"], axis=1)
cur_y = cur_data["Churn"]

In [6]:
ref_X_train, ref_X_test, ref_y_train, ref_y_test = train_test_split(ref_X, ref_y, test_size=0.2, random_state=42)

In [7]:

ref_y_test

5133    1
6722    0
245     0
1880    0
5926    0
       ..
4379    0
2574    0
3128    1
2687    0
5259    0
Name: Churn, Length: 1402, dtype: int64

In [8]:
cur_X

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,InternetService,OnlineSecurity,Contract,PaymentMethod,MonthlyCharges,TotalCharges
0,0,0,1,1,17,1,1,1,0,2,104.20,1743
1,0,0,1,1,56,1,0,1,1,0,68.75,3815
2,1,1,0,0,4,1,1,1,0,2,75.35,273
3,1,0,1,1,67,1,0,1,2,0,60.40,3953
4,0,0,0,0,37,1,1,0,0,1,100.30,3541
...,...,...,...,...,...,...,...,...,...,...,...,...
1397,1,0,1,1,70,1,0,1,2,1,68.95,4858
1398,0,0,0,1,12,1,1,0,0,2,105.30,1275
1399,1,0,0,0,2,0,0,0,0,0,29.85,75
1400,1,0,1,0,52,1,0,1,1,1,81.40,4354


In [9]:
ref_y.value_counts()

Churn
0    5156
1    1853
Name: count, dtype: int64

In [10]:
rf = RandomForestClassifier()
rf.fit(ref_X_train.values, ref_y_train.values)

In [11]:
ref_pred = rf.predict(ref_X_test)
ref_pred = pd.DataFrame(ref_pred, columns=["Prediction"])
cur_pred = rf.predict(cur_X)
cur_pred = pd.DataFrame(cur_pred, columns=["Prediction"])

In [12]:
ref_X_test.reset_index(inplace=True, drop=True)
ref_y_test.reset_index(inplace=True, drop=True)
ref_merged = pd.concat([ref_X_test, ref_y_test], axis=1)
ref_merged = pd.concat([ref_merged, ref_pred], axis=1)
ref_merged

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,InternetService,OnlineSecurity,Contract,PaymentMethod,MonthlyCharges,TotalCharges,Churn,Prediction
0,0,0,1,1,17,1,1,1,0,2,104.20,1743,1,0
1,0,0,1,1,56,1,0,1,1,0,68.75,3815,0,0
2,1,1,0,0,4,1,1,1,0,2,75.35,273,0,0
3,1,0,1,1,67,1,0,1,2,0,60.40,3953,0,0
4,0,0,0,0,37,1,1,0,0,1,100.30,3541,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397,1,0,1,1,70,1,0,1,2,1,68.95,4858,0,0
1398,0,0,0,1,12,1,1,0,0,2,105.30,1275,0,1
1399,1,0,0,0,2,0,0,0,0,0,29.85,75,1,1
1400,1,0,1,0,52,1,0,1,1,1,81.40,4354,0,0


In [13]:
cur_X.reset_index(inplace=True, drop=True)
cur_y.reset_index(inplace=True, drop=True)
cur_merged = pd.concat([cur_X, cur_y], axis=1)
cur_merged = pd.concat([cur_merged, cur_pred], axis=1)
cur_merged

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,InternetService,OnlineSecurity,Contract,PaymentMethod,MonthlyCharges,TotalCharges,Churn,Prediction
0,0,0,1,1,17,1,1,1,0,2,104.20,1743,1,0
1,0,0,1,1,56,1,0,1,1,0,68.75,3815,0,0
2,1,1,0,0,4,1,1,1,0,2,75.35,273,0,0
3,1,0,1,1,67,1,0,1,2,0,60.40,3953,0,0
4,0,0,0,0,37,1,1,0,0,1,100.30,3541,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397,1,0,1,1,70,1,0,1,2,1,68.95,4858,0,0
1398,0,0,0,1,12,1,1,0,0,2,105.30,1275,0,1
1399,1,0,0,0,2,0,0,0,0,0,29.85,75,1,1
1400,1,0,1,0,52,1,0,1,1,1,81.40,4354,0,0


In [17]:
cm = ColumnMapping()
cm.target = "Churn"
cm.prediction = "Prediction"
cm.target_names = ['Non-Churned', 'Churned']


In [18]:
classification_performance_report = Report(metrics=[
    ClassificationPreset(),
])
classification_performance_report.run(reference_data=ref_merged, current_data=cur_merged, column_mapping = cm)
classification_performance_report.save_html("../reports/classification_performance_report.html")

In [19]:
# Using Probas

ref_probas = pd.DataFrame(rf.predict_proba(ref_X_test))
ref_probas.columns = ['Non-Churned', 'Churned']
cur_probas = pd.DataFrame(rf.predict_proba(cur_X))
cur_probas.columns = ['Non-Churned', 'Churned']

In [20]:
ref_probas

Unnamed: 0,Non-Churned,Churned
0,0.62,0.38
1,0.99,0.01
2,0.60,0.40
3,0.99,0.01
4,0.54,0.46
...,...,...
1397,0.94,0.06
1398,0.36,0.64
1399,0.33,0.67
1400,0.97,0.03


In [21]:
ref_probas.value_counts()

Non-Churned  Churned
1.0000       0.0000     156
0.9900       0.0100      90
0.9800       0.0200      52
0.9700       0.0300      44
0.9600       0.0400      40
                       ... 
0.1170       0.8830       1
0.8350       0.1650       1
0.1350       0.8650       1
0.8025       0.1975       1
0.1800       0.8200       1
Name: count, Length: 118, dtype: int64

In [22]:
ref_X_test.reset_index(inplace=True, drop=True)
ref_X_test['Attrition'] = ['Non-Churned' if x == 0 else 'Churned' for x in ref_y_test]
ref_merged = pd.concat([ref_X_test, ref_probas], axis = 1)

cur_X.reset_index(inplace=True, drop=True)
cur_X['Attrition'] = ['Non-Churned' if x == 0 else 'Churned' for x in cur_y]
cur_merged = pd.concat([cur_X, cur_probas], axis = 1)


In [23]:
ref_merged

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,InternetService,OnlineSecurity,Contract,PaymentMethod,MonthlyCharges,TotalCharges,Attrition,Non-Churned,Churned
0,0,0,1,1,17,1,1,1,0,2,104.20,1743,Churned,0.62,0.38
1,0,0,1,1,56,1,0,1,1,0,68.75,3815,Non-Churned,0.99,0.01
2,1,1,0,0,4,1,1,1,0,2,75.35,273,Non-Churned,0.60,0.40
3,1,0,1,1,67,1,0,1,2,0,60.40,3953,Non-Churned,0.99,0.01
4,0,0,0,0,37,1,1,0,0,1,100.30,3541,Non-Churned,0.54,0.46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397,1,0,1,1,70,1,0,1,2,1,68.95,4858,Non-Churned,0.94,0.06
1398,0,0,0,1,12,1,1,0,0,2,105.30,1275,Non-Churned,0.36,0.64
1399,1,0,0,0,2,0,0,0,0,0,29.85,75,Churned,0.33,0.67
1400,1,0,1,0,52,1,0,1,1,1,81.40,4354,Non-Churned,0.97,0.03


In [26]:
cm  = ColumnMapping()
cm.target = 'Attrition'
cm.prediction = ['Non-Churned', 'Churned']
cm.pos_label = 'Non-Churned'

In [27]:
classification_performance_report = Report(metrics=[
    ClassificationPreset(),
])

classification_performance_report.run(reference_data=ref_merged, current_data=cur_merged, column_mapping = cm)
classification_performance_report.save_html("../reports/classification_performance_reports.html")