# Import Libraries And Dataset

In [None]:
# avoiding package version difference inconsistency for the dashboard
import werkzeug
from werkzeug.debug.tbtools import DebugTraceback
werkzeug.debug.tbtools.get_current_traceback = DebugTraceback

In [None]:
# dashboard libraries
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# dataset reading-manipıulation library
import pandas as pd

# label encoding lirary for object type columns of the dataset
from sklearn.preprocessing import LabelEncoder

# normalization library for the dataset
from sklearn.preprocessing import StandardScaler

# dataset split library for train and test datasets 
from sklearn.model_selection import train_test_split

# libraries for machine learning algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# evaluation metric libraries for evaluating mahine learning algorithms
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

# plot libraries to get confusion matrix(heatmap) and scatter plot
import plotly.figure_factory as ff
import plotly.graph_objects as go

In [None]:
path=r"C:\Users\hasan\OneDrive\Masaüstü\Data_Science\Data_Science_Projects\Customer_Churn_Dataset\data\all_customer_churn_data.csv"
data = pd.read_csv(path)

---

# Dataset Preperation

In [None]:
# label encoding
label_encoder = LabelEncoder()
data['Gender_Numerical'] = label_encoder.fit_transform(data['Gender'])
data['Subscription_Type_Numerical'] = label_encoder.fit_transform(data['Subscription Type'])
data['Contract_Length_Numerical'] = label_encoder.fit_transform(data['Contract Length'])

In [None]:
# splitting the daaseet into predictors and target dataset
# (also encoded labels and unnecessary columns are dropped)
Y = data['Churn'].to_numpy()
X = data.drop(columns=['CustomerID', 'Gender', 'Subscription Type', 'Contract Length', 'Churn'])

In [None]:
# normalizing the daataset
standard_scaler = StandardScaler()
normalized_X = standard_scaler.fit_transform(X)

In [None]:
# splitting the dataset into train and test datasets
train_X, test_X, train_Y, test_Y = train_test_split(normalized_X, Y, test_size=0.2, random_state=42)

---

# Application Of The ML Models

In [None]:
# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(train_X, train_Y)
yhat_log_reg = log_reg.predict(test_X)

# Logistic Regression Model Evaluation
cm_log_reg = confusion_matrix(test_Y, yhat_log_reg)

FN_log_reg = cm_log_reg[1][0]
TP_log_reg = cm_log_reg[1][1]
TN_log_reg = cm_log_reg[0][0]
FP_log_reg = cm_log_reg[0][1]

type_1_log_reg = FP_log_reg/(TN_log_reg+FP_log_reg)
type_2_log_reg = FN_log_reg/(TP_log_reg+FN_log_reg)
accuracy_score_log_reg = accuracy_score(test_Y, yhat_log_reg)
f1_score_log_reg = f1_score(test_Y, yhat_log_reg)

In [None]:
# K-Nearest Neighbours(KNN)
knn_classifier = KNeighborsClassifier()
knn_classifier.fit(train_X, train_Y)
yhat_knn_classifier = knn_classifier.predict(test_X)

# K-Nearest Neighbours(KNN) Model Evaluation
cm_knn_classifier = confusion_matrix(test_Y, yhat_knn_classifier)

FN_knn_classifier = cm_knn_classifier[1][0]
TP_knn_classifier = cm_knn_classifier[1][1]
TN_knn_classifier = cm_knn_classifier[0][0]
FP_knn_classifier = cm_knn_classifier[0][1]

type_1_knn_classifier = FP_knn_classifier/(TN_knn_classifier+FP_knn_classifier)
type_2_knn_classifier = FN_knn_classifier/(TP_knn_classifier+FN_knn_classifier)
accuracy_score_knn_classifier = accuracy_score(test_Y, yhat_knn_classifier)
f1_score_knn_classifier = f1_score(test_Y, yhat_knn_classifier)

In [None]:
# Naïve Bayes(BernoulliNB)
naive_bernoulli = BernoulliNB()
naive_bernoulli.fit(train_X, train_Y)
yhat_naive_bernoulli = naive_bernoulli.predict(test_X)

# Naïve Bayes(BernoulliNB) Model Evaluation
cm_naive_bernoulli = confusion_matrix(test_Y, yhat_naive_bernoulli)

FN_naive_bernoulli = cm_naive_bernoulli[1][0]
TP_naive_bernoulli = cm_naive_bernoulli[1][1]
TN_naive_bernoulli = cm_naive_bernoulli[0][0]
FP_naive_bernoulli = cm_naive_bernoulli[0][1]

type_1_naive_bernoulli = FP_naive_bernoulli/(TN_naive_bernoulli+FP_naive_bernoulli)
type_2_naive_bernoulli = FN_naive_bernoulli/(TP_naive_bernoulli+FN_naive_bernoulli)
accuracy_score_naive_bernoulli = accuracy_score(test_Y, yhat_naive_bernoulli)
f1_score_naive_bernoulli = f1_score(test_Y, yhat_naive_bernoulli)

In [None]:
# Decision Tree Classifier
tree_classifier = DecisionTreeClassifier()
tree_classifier.fit(train_X, train_Y)
yhat_tree_classifier = tree_classifier.predict(test_X)

# Decision Tree Classifier Model Evaluation
cm_tree_classifier = confusion_matrix(test_Y, yhat_tree_classifier)

FN_tree_classifier = cm_tree_classifier[1][0]
TP_tree_classifier = cm_tree_classifier[1][1]
TN_tree_classifier = cm_tree_classifier[0][0]
FP_tree_classifier = cm_tree_classifier[0][1]

type_1_tree_classifier = FP_tree_classifier/(TN_tree_classifier+FP_tree_classifier)
type_2_tree_classifier = FN_tree_classifier/(TP_tree_classifier+FN_tree_classifier)
accuracy_score_tree_classifier = accuracy_score(test_Y, yhat_tree_classifier)
f1_score_tree_classifier= f1_score(test_Y, yhat_tree_classifier)

In [None]:
# Random Forest Classifier
rand_forest_classifier = RandomForestClassifier()
rand_forest_classifier.fit(train_X, train_Y)
yhat_rand_forest_classifier = rand_forest_classifier.predict(test_X)

# Random Forest Classifier Model Evaluation
cm_rand_forest_classifier = confusion_matrix(test_Y, yhat_rand_forest_classifier)

FN_rand_forest_classifier = cm_rand_forest_classifier[1][0]
TP_rand_forest_classifier = cm_rand_forest_classifier[1][1]
TN_rand_forest_classifier = cm_rand_forest_classifier[0][0]
FP_rand_forest_classifier = cm_rand_forest_classifier[0][1]

type_1_rand_forest_classifier = FP_rand_forest_classifier/(TN_rand_forest_classifier+FP_rand_forest_classifier)
type_2_rand_forest_classifier = FN_rand_forest_classifier/(TP_rand_forest_classifier+FN_rand_forest_classifier)
accuracy_score_rand_forest_classifier = accuracy_score(test_Y, yhat_rand_forest_classifier)
f1_score_rand_forest_classifier = f1_score(test_Y, yhat_rand_forest_classifier)

---

# Evaluation Metric Results And Best Results

In [None]:
# metrics dictionary
metrics = {'Model':['Logistic Regression', 
                    'K-Nearest Neighbours(KNN)', 
                    'Naïve Bayes(BernoulliNB)', 
                    'Decision Tree Classifier', 
                    'Random Forest Classifier'],
           
           'Type-1 Error':[type_1_log_reg,
                           type_1_knn_classifier,
                           type_1_naive_bernoulli,
                           type_1_tree_classifier,
                           type_1_rand_forest_classifier],
           
           'Type-2 Error':[type_2_log_reg,
                           type_2_knn_classifier,
                           type_2_naive_bernoulli,
                           type_2_tree_classifier,
                           type_2_rand_forest_classifier],
           
           'Accuracy Score':[accuracy_score_log_reg,
                             accuracy_score_knn_classifier,
                             accuracy_score_naive_bernoulli,
                             accuracy_score_tree_classifier,
                             accuracy_score_rand_forest_classifier],
           
           'F1 Score':[f1_score_log_reg,
                       f1_score_knn_classifier,
                       f1_score_naive_bernoulli,
                       f1_score_tree_classifier,
                       f1_score_rand_forest_classifier]}


# best results of the metrics
metrics_df = pd.DataFrame(metrics)
type_1_best = metrics_df['Type-1 Error'].min()
type_2_best = metrics_df['Type-2 Error'].min()
accuracy_score_best = metrics_df['Accuracy Score'].max()
f1_score_best = metrics_df['F1 Score'].max()

---

# Plot Functions To Be Used In Callback

In [None]:
# plot function for the confusion matrix
def plot_confusion_matrix(y_true, y_pred, model_name):
    
    confusion_matrix_data = confusion_matrix(y_true, y_pred)
    
    FN = str(confusion_matrix_data[1][0]) # 1
    TP = str(confusion_matrix_data[1][1]) # 2
    TN = str(confusion_matrix_data[0][0]) # 3
    FP = str(confusion_matrix_data[0][1]) # 4
    
    # cm_text = [['1', '2'], ['3', '4']]
    cm_text = [[FN, TP], [TN, FP]]
    
    fig = ff.create_annotated_heatmap(confusion_matrix_data,
                                      x=['Un-Churned(0)', 'Churned(1)'],
                                      y=['Churned(1)', 'Unchurned(0)'],
                                      annotation_text=cm_text,
                                      colorscale='teal')
    fig.update_layout(title_text = model_name+' Confusion Matrix', title_x=0.5)
    fig.update_xaxes(title_text='Predicted Values')
    fig.update_yaxes(title_text='Actual Values')
    fig.update_layout(xaxis={'side': 'bottom'})
    
    return fig

In [None]:
# plot function for the metric results
def plot_information_metrics(metric_type, metrics):
    
    fig = go.Figure(data=go.Scatter(x=metrics['Model'],
                                    y=metrics[metric_type],
                                    mode="markers+lines")
                   )
    fig.update_layout(title=metric_type+' Distribution Graph')
    
    return fig

In [None]:
# ploting best fitted models with respect to metric results
def best_fitted_model(evaluation_metrics):
    
    metrics_df = pd.DataFrame(evaluation_metrics)
    
    type_1 = metrics_df['Type-1 Error'].min()
    type_2 = metrics_df['Type-2 Error'].min()
    accuracy = metrics_df['Accuracy Score'].max()
    f1_score = metrics_df['F1 Score'].max()
    
    best_results = {'Metrics':['Type-1 Error',
                               'Type-2 Error',
                               'Accuracy Score',
                               'F1 Score'],
                    'Best Metric Result':[type_1, type_2, accuracy, f1_score],
                    'Model':[metrics_df[metrics_df['Type-1 Error'] == type_1].to_numpy()[0][0],
                             metrics_df[metrics_df['Type-2 Error'] == type_2].to_numpy()[0][0],
                             metrics_df[metrics_df['Accuracy Score'] == accuracy].to_numpy()[0][0],
                             metrics_df[metrics_df['F1 Score'] == f1_score].to_numpy()[0][0]]}
    
    fig = go.Figure(data=go.Scatter(x=best_results['Model'],
                                    y=best_results['Best Metric Result'],
                                    mode="markers")
                   )
    fig.update_layout(title='Best Fitted Models Distribution Graph', title_x=0.5)
    fig.update_xaxes(title_text='Best Fitted Models With Respect To Evaluation Metrics')
    fig.update_yaxes(title_text='Best Fitted Model Evaluation Metric Result')
    
    return fig

---

In [None]:
# dopdown menu contents
dropdown_menu_list = [{'label':'Logistic Regression Confusion Matrix', 'value':1},
                      {'label':'K-Nearest Neighbours(KNN) Confusion Matrix', 'value':2},
                      {'label':'Naïve Bayes(BernoulliNB) Confusion Matrix', 'value':3},
                      {'label':'Decision Tree Classifier Confusion Matrix', 'value':4},
                      {'label':'Random Forest Classifier Confusion Matrix', 'value':5},
                      {'label':'Type-1 Error Rate Results', 'value':6},
                      {'label':'Type-2 Error Rate Results', 'value':7},
                      {'label':'Accuracy Scores', 'value':8},
                      {'label':'F1 Scores', 'value':9},
                      {'label':'Best Fitted Model(s)', 'value':10}]

---

# Dashboard Application

In [None]:
# initialize app
app = dash.Dash(__name__)

In [None]:
# setting app layout
app.layout = html.Div(children=[
    
    # title division
    html.Div(html.H1('Predictive Customer Churn Dashboard', 
                     style={'text-align':'center',
                            'font-family':'bahnschrift',
                            'color':'#85929E',
                            'font-wieght':'bold'}),
             style={'backgroundColor':'#D7DBDD',
                    'border-radius':'9px'}),
    
    
    # dropdown division
    html.Div(dcc.Dropdown(id='dropdown-menu-option',
                          options=dropdown_menu_list,
                          value=None,
                          placeholder='Select an option...'),
             style={'backgroundColor':'#D7DBDD',
                    'border-radius':'9px',
                    'marginLeft':'15%',
                    'marginRight':'15%'}),
    
    # content division
    html.Div(children=[
        
        # information blocks
        html.Div(children=[
            
            # Type-1 Error Rate(Fall-Out),(False Positive Rate-FPR)
            html.Div([html.P("Type-1 Error Rate",
                             style={'color':'#85929E',
                                    'font-wieght':'bold'}),
                      html.P("-----------------", id='type-1-block-id')],
                     style={'backgroundColor':'#FFFFFF',
                            'display':'inline-block',
                            'font-family':'bahnschrift',
                            'margin':'1%',
                            'padding':'1%',
                            'border-radius':'9px'}),
            
            # Type-2 Error Rate(Miss Rate),(False Negative Rate-FNR)
            html.Div([html.P("Type-2 Error Rate",
                             style={'color':'#85929E',
                                    'font-wieght':'bold'}),
                      html.P("-----------------", id='type-2-block-id')], 
                     style={'backgroundColor':'#FFFFFF',
                            'display':'inline-block',
                            'font-family':'bahnschrift',
                            'margin':'1%',
                            'padding':'1%',
                            'border-radius':'9px'}),
            
            # accuracy score
            html.Div([html.P("Accuracy Score",
                             style={'color':'#85929E',
                                    'font-wieght':'bold'}),
                      html.P("-------------", id='accuracy-score-block-id')], 
                     style={'backgroundColor':'#FFFFFF',
                            'display':'inline-block',
                            'font-family':'bahnschrift',
                            'margin':'1%',
                            'padding':'1%',
                            'border-radius':'9px'}),
            
            # F1 score
            html.Div([html.P("F1 Score",
                             style={'color':'#85929E',
                                    'font-wieght':'bold'}),
                     html.P("-------", id='F1-score-block-id')], 
                     style={'backgroundColor':'#FFFFFF',
                            'display':'inline-block',
                            'font-family':'bahnschrift',
                            'margin':'1%',
                            'padding':'1%',
                            'border-radius':'9px'})
            
        ], style={'backgroundColor':'#D7DBDD',
                  'border-radius':'9px',
                  'text-align':'center'}),
        
        # graph
        html.Div(dcc.Graph(id='plots-id'), 
                 style={'backgroundColor':'#FFFFFF',
                        'marginLeft':'15%',
                        'marginRight':'15%',
                        'padding':'1%',
                        'border-radius':'9px'})
        
        
    ], style={'backgroundColor':'#D7DBDD',
              'margin':'2%',
              'border-radius':'9px'})
    
    
    
], style={'backgroundColor':'#D7DBDD',
          'marginTop':'2%',
          'marginBottom':'2%',
          'marginLeft':'5%',
          'marginRight':'5%',
          'paddingTop':'1%',
          'paddingBottom':'1%',
          'border-radius':'9px'})

In [None]:
@app.callback(
    [Output('plots-id', 'figure'),
     Output('type-1-block-id', 'children'),
     Output('type-2-block-id', 'children'),
     Output('accuracy-score-block-id', 'children'),
     Output('F1-score-block-id', 'children')],
    Input('dropdown-menu-option', 'value')
)

def evaluation_results(dropdown_menu_option):
    
    if dropdown_menu_option == None:
        fig = plot_confusion_matrix(test_Y, yhat_rand_forest_classifier, ' Best Fitted Model: Random Forest Classifier')
        type_1_err_rate = type_1_rand_forest_classifier
        type_2_err_rate = type_2_rand_forest_classifier
        accuracy = accuracy_score_rand_forest_classifier
        f1 = f1_score_rand_forest_classifier
    
    if dropdown_menu_option == 1:
        fig = plot_confusion_matrix(test_Y, yhat_log_reg, 'Linear Regression')
        type_1_err_rate = type_1_log_reg
        type_2_err_rate = type_2_log_reg
        accuracy = accuracy_score_log_reg
        f1 = f1_score_log_reg
        
    if dropdown_menu_option == 2:
        fig = plot_confusion_matrix(test_Y, yhat_knn_classifier, 'K-Nearest Neighbours(KNN)')
        type_1_err_rate = type_1_knn_classifier
        type_2_err_rate = type_2_knn_classifier
        accuracy = accuracy_score_knn_classifier
        f1 = f1_score_knn_classifier
        
    if dropdown_menu_option == 3:
        fig = plot_confusion_matrix(test_Y, yhat_naive_bernoulli, 'Naïve Bayes(BernoulliNB)')
        type_1_err_rate = type_1_naive_bernoulli
        type_2_err_rate = type_2_naive_bernoulli
        accuracy = accuracy_score_naive_bernoulli
        f1 = f1_score_naive_bernoulli
        
    if dropdown_menu_option == 4:
        fig = plot_confusion_matrix(test_Y, yhat_tree_classifier, 'Decision Tree Classifier')
        type_1_err_rate = type_1_tree_classifier
        type_2_err_rate = type_2_tree_classifier
        accuracy = accuracy_score_tree_classifier
        f1 = f1_score_tree_classifier
        
    if dropdown_menu_option == 5:
        fig = plot_confusion_matrix(test_Y, yhat_rand_forest_classifier, 'Random Forest Classifier')
        type_1_err_rate = type_1_rand_forest_classifier
        type_2_err_rate = type_2_rand_forest_classifier
        accuracy = accuracy_score_rand_forest_classifier
        f1 = f1_score_rand_forest_classifier
        
    if dropdown_menu_option == 6:
        fig = plot_information_metrics('Type-1 Error', metrics)
        type_1_err_rate = '----------'
        type_2_err_rate = '----------'
        accuracy = '----------'
        f1 = '----------'
        
    if dropdown_menu_option == 7:
        fig = plot_information_metrics('Type-2 Error', metrics)
        type_1_err_rate = '----------'
        type_2_err_rate = '----------'
        accuracy = '----------'
        f1 = '----------'
        
    if dropdown_menu_option == 8:
        fig = plot_information_metrics('Accuracy Score', metrics)
        type_1_err_rate = '----------'
        type_2_err_rate = '----------'
        accuracy = '----------'
        f1 = '----------'
        
    if dropdown_menu_option == 9:
        fig = plot_information_metrics('F1 Score', metrics)
        type_1_err_rate = '----------'
        type_2_err_rate = '----------'
        accuracy = '----------'
        f1 = '----------'
        
    if dropdown_menu_option == 10:
        fig = best_fitted_model(metrics)
        type_1_err_rate = type_1_best
        type_2_err_rate = type_2_best
        accuracy = accuracy_score_best
        f1 = f1_score_best
        
    return fig, type_1_err_rate, type_2_err_rate, accuracy, f1

In [None]:
# run the dashboard
if __name__ == "__main__":
    app.run_server(port=1234)

---