# Final Project: Customer Churn 

#### Author: Jacob Argue and John Cook
#### Completed: December 2018
#### Course: Business Econometrics II

## 1. Import Relevant Libararies

In [None]:
#Basic libraries
import numpy as np
import pandas as pd

#Train-test split library
from sklearn.model_selection import train_test_split

#Basic libraries for graphs
import matplotlib.pylab as plt
%matplotlib inline

#Libraries for decision boundary plots
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches

## 2. Pre-Process Data

In [None]:
#Load customer churn data
telco_df = pd.read_csv('data/Churn.csv')

#Use dictionary to translate qualitative variables into dummies
dmy_dict = {"yes":1, "no":0}
churn_dict = {"False.":0, "True.":1}
telco_df.Churn = telco_df.Churn.replace(churn_dict)
telco_df.Intl_Plan = telco_df.Intl_Plan.replace(dmy_dict)
telco_df.Vmail_Plan = telco_df.Vmail_Plan.replace(dmy_dict)

#Declaring target and feature data
target_df = telco_df.Churn

In [None]:
telco_df.head()

In [None]:
telco_df.shape

In [None]:
telco_df.dtypes

In [None]:
# Convert two-letter state codes into numbers
state_dict = {"AL":0,"AK":1, "AZ":2, "AR":3, "CA":4, "CO":5, "CT":6, "DE":7, "FL":8, "GA":9, "HI":10, "ID":11, "IL":12, "IN":13, "IA":14, "KS":15, "KY":16, "LA":17, "ME":18, "MD":19, "MA":20, "MI":21, "MN":22, "MS":23, "MO":24, "MT":25, "NE":26, "NV":27, "NH":28, "NJ":29, "NM":30, "NY":31, "NC":32, "ND":33, "OH":34, "OK":35, "OR":36, "PA":37, "RI":38, "SC":39, "SD":40, "TN":41, "TX":42, "UT":43, "VT":44, "VA":45, "WA":46, "WV":47, "WI":48, "WY":49, "DC":50}
telco_df.State = telco_df.State.replace(state_dict)

In [None]:
# Change State column datatype from object to int
telco_df['State'].astype(int)

In [None]:
# Create Dummy Variables for each State

telco_df['AL'] = np.where(telco_df['State']!=0, 0, 1)
telco_df['AK'] = np.where(telco_df['State']!=1, 0, 1)
telco_df['AZ'] = np.where(telco_df['State']!=2, 0, 1)
telco_df['AR'] = np.where(telco_df['State']!=3, 0, 1)
telco_df['CA'] = np.where(telco_df['State']!=4, 0, 1)
telco_df['CO'] = np.where(telco_df['State']!=5, 0, 1)
telco_df['CT'] = np.where(telco_df['State']!=6, 0, 1)
telco_df['DE'] = np.where(telco_df['State']!=7, 0, 1)
telco_df['FL'] = np.where(telco_df['State']!=8, 0, 1)
telco_df['GA'] = np.where(telco_df['State']!=9, 0, 1)
telco_df['HI'] = np.where(telco_df['State']!=10, 0, 1)
telco_df['ID'] = np.where(telco_df['State']!=11, 0, 1)
telco_df['IL'] = np.where(telco_df['State']!=12, 0, 1)
telco_df['IN'] = np.where(telco_df['State']!=13, 0, 1)
telco_df['IA'] = np.where(telco_df['State']!=14, 0, 1)
telco_df['KS'] = np.where(telco_df['State']!=15, 0, 1)
telco_df['KY'] = np.where(telco_df['State']!=16, 0, 1)
telco_df['LA'] = np.where(telco_df['State']!=17, 0, 1)
telco_df['ME'] = np.where(telco_df['State']!=18, 0, 1)
telco_df['MD'] = np.where(telco_df['State']!=19, 0, 1)
telco_df['MA'] = np.where(telco_df['State']!=20, 0, 1)
telco_df['MI'] = np.where(telco_df['State']!=21, 0, 1)
telco_df['MN'] = np.where(telco_df['State']!=22, 0, 1)
telco_df['MS'] = np.where(telco_df['State']!=23, 0, 1)
telco_df['MO'] = np.where(telco_df['State']!=24, 0, 1)
telco_df['MT'] = np.where(telco_df['State']!=25, 0, 1)
telco_df['NE'] = np.where(telco_df['State']!=26, 0, 1)
telco_df['NV'] = np.where(telco_df['State']!=27, 0, 1)
telco_df['NH'] = np.where(telco_df['State']!=28, 0, 1)
telco_df['NJ'] = np.where(telco_df['State']!=29, 0, 1)
telco_df['NM'] = np.where(telco_df['State']!=30, 0, 1)
telco_df['NY'] = np.where(telco_df['State']!=31, 0, 1)
telco_df['NC'] = np.where(telco_df['State']!=32, 0, 1)
telco_df['ND'] = np.where(telco_df['State']!=33, 0, 1)
telco_df['OH'] = np.where(telco_df['State']!=34, 0, 1)
telco_df['OK'] = np.where(telco_df['State']!=35, 0, 1)
telco_df['OR'] = np.where(telco_df['State']!=36, 0, 1)
telco_df['PA'] = np.where(telco_df['State']!=37, 0, 1)
telco_df['RI'] = np.where(telco_df['State']!=38, 0, 1)
telco_df['SC'] = np.where(telco_df['State']!=39, 0, 1)
telco_df['SD'] = np.where(telco_df['State']!=40, 0, 1)
telco_df['TN'] = np.where(telco_df['State']!=41, 0, 1)
telco_df['TX'] = np.where(telco_df['State']!=42, 0, 1)
telco_df['UT'] = np.where(telco_df['State']!=43, 0, 1)
telco_df['VT'] = np.where(telco_df['State']!=44, 0, 1)
telco_df['VA'] = np.where(telco_df['State']!=45, 0, 1)
telco_df['WA'] = np.where(telco_df['State']!=46, 0, 1)
telco_df['WV'] = np.where(telco_df['State']!=47, 0, 1)
telco_df['WI'] = np.where(telco_df['State']!=48, 0, 1)
telco_df['WY'] = np.where(telco_df['State']!=49, 0, 1)
telco_df['DC'] = np.where(telco_df['State']!=50, 0, 1)

In [None]:
telco_df.Area_Code.unique()

In [None]:
Area_Code_dict = {415:0, 408:1, 510:2}
telco_df.Area_Code = telco_df.Area_Code.replace(Area_Code_dict)

In [None]:
# Note: Area codes 415, 408, and 510 correspond to the Bay Area: San Francisco, San Jose, and Oakland, respectively.
telco_df['SanFran'] = np.where(telco_df['Area_Code']!=0, 0, 1)
telco_df['SanJose'] = np.where(telco_df['Area_Code']!=1, 0, 1)
telco_df['Oakland'] = np.where(telco_df['Area_Code']!=2, 0, 1)

In [None]:
# Drop Phone as it only contains the last 7 digits, which are random and will only contribute noise; 
# Drop State as it has been reorganized into dummy variables for each State; 
# Drop Area_Code as it has been reorganized into dummy variables;
# Drop Churn as it is the target variable and should stay separate from feature variables.
features_df = telco_df.drop(['Phone', 'State', 'Area_Code', 'Churn'], axis=1)

In [None]:
features_df.head()

In [None]:
churn_df = telco_df['Churn']

In [None]:
churn_df.head(20)

# 3. Training Machine Learning Algorithms

In [None]:
#Loading train-test split library
from sklearn.model_selection import train_test_split

#Conducting train-test split
X_train, X_test, y_train, y_test = train_test_split(features_df, target_df, test_size=0.33, random_state=23)

In [None]:
#Loading Cross-validation and Hyperparameter optimization libraries
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

## Logit

In [None]:
from sklearn import linear_model

#Step 1: Define the algorithm
logitC = linear_model.LogisticRegression()

#Step 2: Fit the model
logit_fit0 = logitC.fit(X_train, y_train)

#(Step 2A: Display details parameters used for the estimation)
print(logit_fit0)

#Step 3: 
y_score_logit = logit_fit0.predict_proba(X_test)

In [None]:
#Set k fold parameters, including number of k folds and randomization
crossv = KFold(n_splits=3, shuffle=True, random_state=4973)

#Set model to be trained and cross validated
Lasso_mod = linear_model.Lasso()

#Calculate scores for lasso
cv_scores = cross_val_score(Lasso_mod, features_df, churn_df, cv = crossv, scoring='r2')

print('LASSO parameters', Lasso_mod)
print('R-sq for each fold are:', cv_scores)
print('Average R-sq, across folds:', np.mean(cv_scores))
print('Std R-sq, across folds:', np.std(cv_scores))

In [None]:
#Setting up the grid of possible parameter values for alpha
alpha_range = np.linspace(0.001, 10, 100)

#Defining grid search
gscv_model = GridSearchCV(Lasso_mod, dict(alpha=alpha_range), cv=3, refit=True, scoring='r2')
gscv_result = gscv_model.fit(X_train, y_train)

#Report results
print(gscv_result.best_estimator_)
print('The optimal penalty parameter is:', gscv_result.best_params_)
print('Cross-validated R-squared:', gscv_result.best_score_)

In [None]:
from sklearn.model_selection import validation_curve
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
train_scores, test_scores = validation_curve(Lasso_mod, features_df, churn_df, param_range=alpha_range, param_name="alpha", cv=3, scoring='r2')

In [None]:
#Outcomes of interest
mean_test = np.mean(test_scores, axis=1)

#plot
plt.figure(figsize=(20,10))
plt.plot(alpha_range, mean_test, 'o-', label="Cross-validation")
plt.grid()
plt.xlabel('Value for alpha')
plt.ylabel('Mean test R-squared')
plt.show()

This essentially shows that an alpha value of '0', or non-regularized lasso, is the best.

### Computing ROC and AUC

In [None]:
from sklearn.metrics import roc_curve, auc

#Calculate False-Positive and True Positive Rates
fpr_logit, tpr_logit, _ = roc_curve(y_test, y_score_logit[:,1])

#AUC
roc_auc_logit = auc(fpr_logit, tpr_logit)

## Decision Tree

In [None]:
from sklearn import tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)

In [None]:
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
roc_auc

#### Max Depth optimization

In [None]:
max_depths = np.linspace(1, 20, 20, endpoint=True)
train_results = []
test_results = []
for max_depth in max_depths:
   dt = DecisionTreeClassifier(max_depth=max_depth)
   dt.fit(X_train, y_train)
   train_pred = dt.predict(X_train)
   false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)
   roc_auc = auc(false_positive_rate, true_positive_rate)
   # Add auc score to previous train results
   train_results.append(roc_auc)
   y_pred = dt.predict(X_test)
   false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)
   roc_auc = auc(false_positive_rate, true_positive_rate)
   # Add auc score to previous test results
   test_results.append(roc_auc)

from matplotlib.legend_handler import HandlerLine2D
line1, = plt.plot(max_depths, train_results, color='blue', label='Train AUC')
line2, = plt.plot(max_depths, test_results, color='red', label='Test AUC')
plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
plt.ylabel('AUC score')
plt.xlabel('Tree depth')
plt.show()

The ideal tree depth appears to be 5. After that point there is overfitting. 

#### min_samples_split optimization

In [None]:
min_samples_splits = np.linspace(0.01, 0.2, 20, endpoint=True)
train_results = []
test_results = []
for min_samples_split in min_samples_splits:
   dt = DecisionTreeClassifier(min_samples_split=min_samples_split)
   dt.fit(X_train, y_train)
   train_pred = dt.predict(X_train)
   false_positive_rate, true_positive_rate, thresholds =    roc_curve(y_train, train_pred)
   roc_auc = auc(false_positive_rate, true_positive_rate)
   train_results.append(roc_auc)
   y_pred = dt.predict(X_test)
   false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)
   roc_auc = auc(false_positive_rate, true_positive_rate)
   test_results.append(roc_auc)
from matplotlib.legend_handler import HandlerLine2D
line1, = plt.plot(min_samples_splits, train_results, 'B', label='Train AUC')
line2, = plt.plot(min_samples_splits, test_results, 'R', label='Test AUC')
plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
plt.ylabel('AUC score')
plt.xlabel('min samples split')
plt.show()

The ideal minimum percentage of the sample that should be considered is .7%; less than this and there is overfitting, and more than this there is a rapid decline in AUC accuracy. 

### Calculating Decision Tree with optimized values

In [None]:
#Step 1: Define the algorithm
dt1 = tree.DecisionTreeClassifier(criterion="entropy", max_depth= 5, min_samples_split=.07)

#Step 2: Fit the model
dt_fit1 = dt1.fit(X_train, y_train)

#(Step 2A: Display details parameters used for the estimation)
print(dt_fit1)

#Step 3: 
y_score_dtc = dt_fit1.predict_proba(X_test)

In [None]:
#Step 1: Define the algorithm
dtc0 = tree.DecisionTreeClassifier(criterion="entropy")

#Step 2: Fit the model
dtc_fit0 = dtc0.fit(X_train, y_train)

#(Step 2A: Display details parameters used for the estimation)
print(dtc_fit0)

#Step 3: 
y_score_dtc = dtc_fit0.predict_proba(X_test)

### Graphing Decision tree

In [None]:
from sklearn.externals.six import StringIO  
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus

dot_data = StringIO()
export_graphviz(dt1, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
Image(graph.create_png())

### Computing ROC and AUC

In [None]:
#Calculate False-Positive and True Positive Rates
fpr_dtc, tpr_dtc, _ = roc_curve(y_test, y_score_dtc[:,1])

#AUC
roc_auc_dtc = auc(fpr_dtc, tpr_dtc)

## Bagging Classifier

In [None]:
from sklearn import ensemble
from sklearn.metrics import confusion_matrix

In [None]:
#Step 1: designate algorithm
bagc0 = ensemble.BaggingClassifier(random_state=42)

#Step 2: fit model on training data
bagc_fit0 = bagc0.fit(X_train, y_train)

#Step 2A: display parameters used
print(bagc_fit0)

#Step 3: 
y_score_bagc = bagc_fit0.predict_proba(X_test)

In [None]:
print('Accuracy for Bagging Classifier', bagc_fit0.score(X_test, y_test))

In [None]:
y_pred0 = bagc_fit0.predict(X_test)
cmatrix0 = confusion_matrix(y_test, y_pred0)
df_cm = pd.DataFrame(cmatrix0, index=['Churn (true)', 'Stay (true)'], columns=['Churn (pred)', 'Stay (pred)'])
df_cm

### K-Fold Cross-Validation

In [None]:
#Set k fold parameters, including number of k folds and randomization
crossv = KFold(n_splits=10, shuffle=True, random_state=4973)

#Calculate scores for decision tree
cv_scores = cross_val_score(bagc0, features_df, target_df,cv = crossv, scoring='accuracy')

print('Decision Tree parameters', bagc0)
print('Accuracy for each fold are:', cv_scores)
print('Average accuracy, across folds:', np.mean(cv_scores))
print('Std accuracy, across folds:', np.std(cv_scores))

### Computing ROC and AUC

In [None]:
#Calculate False-Positive and True Positive Rates
fpr_bagc, tpr_bagc, _ = roc_curve(y_test, y_score_bagc[:,1])

#AUC
roc_auc_bagc = auc(fpr_bagc, tpr_bagc)

## Random Forest

In [None]:
#Step 1: designate algorithm
rfc0 = ensemble.RandomForestClassifier(random_state=23, criterion='entropy')

#Step 2: fit model on training data
rfc_fit0 = rfc0.fit(X_train, y_train)

#Step 2A: display parameters used
print(rfc_fit0)

#Step 3: 
y_score_rfc = rfc_fit0.predict_proba(X_test)

### Computing ROC and AUC

In [None]:
#Calculate False-Positive and True Positive Rates
fpr_rfc, tpr_rfc, _ = roc_curve(y_test, y_score_rfc[:,1])

#AUC
roc_auc_rfc = auc(fpr_rfc, tpr_rfc)

## k-Nearest Neighbor Classifiers

In [None]:
from sklearn import neighbors

#Step 1: designate algorithm
kNNc0 = neighbors.KNeighborsClassifier(weights='uniform', algorithm='brute', p=2)

#Step 2: fit model on training data
kNNc_fit0 = kNNc0.fit(X_train, y_train)

#Step 2A: display parameters used
print(kNNc_fit0)

#Step 3: 
y_score_kNNc = kNNc_fit0.predict_proba(X_test)

In [None]:
print('Accuracy for kNN Classifier', kNNc_fit0.score(X_test, y_test))

In [None]:
y_pred0 = kNNc_fit0.predict(X_test)
cmatrix0 = confusion_matrix(y_test, y_pred)
df_cm = pd.DataFrame(cmatrix0, index=['Churn (true)', 'Stay (true)'], columns=['Churn (pred)', 'Stay (pred)'])
df_cm

### K-Fold Cross Validation

In [None]:
#Set k fold parameters, including number of k folds and randomization
crossv = KFold(n_splits=10, shuffle=True, random_state=23)

#Calculate scores for decision tree
cv_scores = cross_val_score(kNNc0, features_df, target_df,cv = crossv, scoring='accuracy')

print('Decision Tree parameters', kNNc0)
print('Accuracy for each fold are:', cv_scores)
print('Average accuracy, across folds:', np.mean(cv_scores))
print('Std accuracy, across folds:', np.std(cv_scores))

### Computing ROC and AUC

In [None]:
#Calculate False-Positive and True Positive Rates
fpr_kNNc, tpr_kNNc, _ = roc_curve(y_test, y_score_kNNc[:,1])

#AUC
roc_auc_kNNc = auc(fpr_kNNc, tpr_kNNc)

## 4. Comparing the Models

In [None]:
# Reciever Operating Characteristic (ROC) Curves is created by plotting the true positive rate (TPR) against the false positive rate (FPR) at various threshold settings.
from sklearn.metrics import roc_curve, auc

In [None]:
plt.figure()
plt.plot(fpr_logit, tpr_logit, color='darkorange',lw=2 ,label='Ridge-Logit (AUC = %0.2f)' % roc_auc_logit)
plt.plot(fpr_dtc, tpr_dtc, color='darkred',lw=2 ,label='Decision Tree (AUC = %0.2f)' % roc_auc_dtc)
plt.plot(fpr_kNNc, tpr_kNNc, color='red',lw=2 ,label='kNN (AUC = %0.2f)' % roc_auc_kNNc)
plt.plot(fpr_bagc, tpr_bagc, color='green',lw=2 ,label='Bagged (AUC = %0.2f)' % roc_auc_bagc)
plt.plot(fpr_rfc, tpr_rfc, color='blue',lw=2 ,label='Random Forest (AUC = %0.2f)' % roc_auc_rfc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curves')
plt.legend(loc="lower right")
plt.show()

In [None]:
import scikitplot as skplt

In [None]:
skplt.metrics.plot_cumulative_gain(y_test, y_score_logit)
plt.title('Cumulative Response Curve Ridge-Logit')
plt.show()

skplt.metrics.plot_cumulative_gain(y_test, y_score_dtc)
plt.title('Cumulative Response Curve Decision Tree')
plt.show()

skplt.metrics.plot_cumulative_gain(y_test, y_score_kNNc)
plt.title('Cumulative Response Curve kNN Classifier')
plt.show()

skplt.metrics.plot_cumulative_gain(y_test, y_score_bagc)
plt.title('Cumulative Response Curve Bagging Classfier')
plt.show()

skplt.metrics.plot_cumulative_gain(y_test, y_score_rfc)
plt.title('Cumulative Response Curve Random Forest Classfier')
plt.show()

In [None]:
skplt.metrics.plot_lift_curve(y_test, y_score_logit)
plt.title('Lift Curve Ridge-Logit')
plt.show()

skplt.metrics.plot_lift_curve(y_test, y_score_dtc)
plt.title('Lift Curve Decision Tree')
plt.show()

skplt.metrics.plot_lift_curve(y_test, y_score_kNNc)
plt.title('Lift Curve kNN Classifier')
plt.show()

skplt.metrics.plot_lift_curve(y_test, y_score_bagc)
plt.title('Lift Curve Bagging Classfier')
plt.show()

skplt.metrics.plot_lift_curve(y_test, y_score_rfc)
plt.title('Lift Curve Random Forest Classfier')
plt.show()

## 5. Profit Curves

Lifetime value of a Verizon customer in 2005 (the most recent date data is available) is $2,589. Due to inflation and growth, I will round that to $3,000. 

True Positives we predict to churn and they do churn; True Negatives we predict to stay and they stay.
False Positives we predict to churn and they stay; False Negatives we predict to stay and they churn.

The profit impact of prediction varies by which area in the confusion matrix a given observation is. 

In [None]:
from sklearn.metrics import confusion_matrix

def standard_confusion_matrix(y_true, y_pred):
    #Reformat confusion matrix output from sklearn for plotting profit curve.
    [[tn, fp], [fn, tp]] = confusion_matrix(y_true, y_pred)
    return np.array([[tp, fp], [fn, tn]])

#### Profit Cost-Benefit Matrix

In [None]:
# Cost-benefit matrix with best guesses
profit_TP = 500 # I assume that of the customers who are predicted to churn, we can get one-sixth of them to stay through targeted promotions, etc.
profit_FP = -300 # The cost of giving customers who were going to stay anyway 10% discount, for example
profit_FN = -3000 # The customers we thought would stay but did not: negative lifetime value of a customer
profit_TN = 3000 # Lifetime value of a customer

costbenefit_mat = np.array([[profit_TP, profit_FP],
                            [profit_FN, profit_TN]])

In [None]:
def plot_profit_curve(model_label, costbenefit_mat, y_proba, y_test, col):
    '''
    Plot profit curve.
    
    INPUTS:
    - model label
    - cost benefit matrix in the same format as the confusion matrix
    - predicted probabilities on test data
    - actual test data
    - colors
    ''' 

    # Profit curve data
    profits = [] # one profit value for each T (threshold)
    thresholds = sorted(y_proba, reverse=True)
    
    # For each threshold, calculate profit - starting with largest threshold
    for T in thresholds:
        y_pred = (y_proba > T).astype(int)
        confusion_mat = standard_confusion_matrix(y_test, y_pred)
        # Calculate total profit for this threshold
        profit = sum(sum(confusion_mat * costbenefit_mat)) / len(y_test)
        profits.append(profit)    
    
    # Profit curve plot
    max_profit = round(max(profits), 2)
    plt.plot(np.linspace(0, 1, len(y_test)), profits, color=col, linewidth=3, label = '{}, max profit ${} per customer'.format(model_label, max_profit))
    

In [None]:
models = [logitC, dt, kNNc0, bagc0, rfc0]
fig = plt.figure(figsize=(10,8))
# fig.set_facecolor('#F2F2F2')
colors = ['r', 'g', 'b', 'm', 'darkorange']
for i, model in enumerate(models):
    model.fit(X_train, y_train)
    y_score = model.predict_proba(X_test)[:,1]
    plot_profit_curve(model.__class__.__name__, costbenefit_mat, y_score, y_test, colors[i])

plt.title("Profit Curves")
plt.xlabel("Percentage of test customers (decreasing by score)")
plt.ylabel("Profit")
plt.legend(loc='lower right')
plt.savefig('Profit_curve.png', facecolor=fig.get_facecolor())
plt.show()

Now that we know that the Bagging Classifier is the best algorithm for this dataset, let's see how total profitability is affected. At the maximum, profit per customer is 2417.45 and if we were to expand this to the entire dataset of 3333 customers, this results in 8,056,694.25 more profit. This is as compared with 7,836,182.97 for Random Forest, 7,498,050.12 for Decision Tree, 7,125,354.06 for K-Nearest Neighbors, and 7,045,962.00 for Logistic Regression.