In [11]:
import numpy as np
import pandas as pd
import json
from flask import Flask, jsonify
from flask_cors import CORS
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import xgboost

In [None]:
# Read in the tuned hyperparameters
try:
    # Open the JSON file
    with open('Models.json', 'r') as file:
        # Load the models
        models = json.load(file)
    # File is successfully opened and loaded
    print(models)
except FileNotFoundError:
    # Handle the FileNotFoundError
    print("The file 'Models.json' does not exist or cannot be opened.")

In [None]:
# Read in the list of columns that were not used to train
try:
    # Open the JSON file
    with open('FeaturesDropped.json', 'r') as file:
        # Load the models
        features_dropped = json.load(file)
    # File is successfully opened and loaded
    print(features_dropped)
except FileNotFoundError:
    # Handle the FileNotFoundError
    print("The file 'FeaturesDropped.json' does not exist or cannot be opened.")

In [21]:
# Read in the train and test data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
input_features = [column for column in train_data.columns if column not in features_dropped]]
print("Train: ", train_data.head())
print("\n")
print("Test: ", test_data.head())

Train:     id  CustomerId         Surname  CreditScore Geography Gender   Age  Tenure  \
0   0    15674932  Okwudilichukwu          668    France   Male  33.0       3   
1   1    15749177   Okwudiliolisa          627    France   Male  33.0       1   
2   2    15694510           Hsueh          678    France   Male  40.0      10   
3   3    15741417             Kao          581    France   Male  34.0       2   
4   4    15766172       Chiemenam          716     Spain   Male  33.0       5   

     Balance  NumOfProducts  HasCrCard  IsActiveMember  EstimatedSalary  \
0       0.00              2        1.0             0.0        181449.97   
1       0.00              2        1.0             1.0         49503.50   
2       0.00              2        1.0             0.0        184866.69   
3  148882.54              1        1.0             1.0         84560.88   
4       0.00              2        1.0             1.0         15068.83   

   Exited  
0       0  
1       0  
2       0  
3     

In [None]:
# Separate the lgr, xgb, and rf parameters into
# a list of 4 dictionaries with key value pairs of hyperparamters for Active, Reactivated, Dormant, and Churned Classification
lgr = models[0]
xgb = models[1]
rf = models[2]

In [9]:
lgr_models_list = []
xgb_models_list = []
rf_models_list = []
for i in range(4):
    # Train the lgr models and store in the list
    lgr_model = LogisticRegression(lgr[**i])
    lgr_model.fit(train_data[input_features])
    lgr_models_list.append(lgr_model)
    
    # Train the xgb models and store in the list
    xgb_model = xgboost(xgb[**i])
    xgb_model.fit(train_data[input_features])
    xgb_models_list.append(xgb_model)
    
    # Train the rf models and store in the list
    rf_model = RandomForestClassifier(rf[**i])
    rf_model.fit(train_data[input_features])
    rf_models_list.append(rf_model)

In [None]:
# Define the function to generate predictions from the trained models
def prediction(lgr, xgb, rf, data, features): 
    # Generate predictions for data
    for i in range(len(lgr)):
        # lgr[0], xgb[0], rf[0] is the model with binary label Active/Non-Active
        if i == 0:
            data['lgr_Active_proba'] = lgr[0].predict_proba(features)
            data['xgb_Active_proba'] = xgb[0].predict_proba(features)
            data['rf_Active_proba'] = rf[0].predict_proba(features)
            
        # lgr[1], xgb[1], rf[1] is the model with binary label Reactivated/Non-Reactivated
        elif i == 1:
            data['lgr_Reactivated_proba'] = lgr[1].predict_proba(features)
            data['xgb_Reactivated_proba'] = xgb[1].predict_proba(features)
            data['rf_Reactivated_proba'] = rf[1].predict_proba(features)
            
        # lgr[2], xgb[2], rf[2] is the model with binary label Dormant/Non-Dormant
        elif i == 2:
            data['lgr_Dormant_proba'] = lgr[2].predict_proba(features)
            data['xgb_Dormant_proba'] = xgb[2].predict_proba(features)
            data['rf_Dormant_proba'] = rf[2].predict_proba(features)
           
        # lgr[3], xgb[3], rf[3] is the model with binary label Churned/Non-Churned
        elif i == 3:
            data['lgr_Churned_proba'] = lgr[3].predict_proba(features)
            data['xgb_Churned_proba'] = xgb[3].predict_proba(features)
            data['rf_Churned_proba'] = rf[3].predict_proba(features)
    
    # Calculate the average probability from the probabilities generated by each model (Ensemble Learning)
    data['average_Active_proba'] = data[['lgr_Active_proba', 'xgb_Active_proba', 'rf_Active_proba']].agg(mean, axis = 1)
    data['average_Reactivated_proba'] = data[['lgr_Reactivated_proba', 'xgb_Reactivated_proba', 'rf_Reactivated_proba']].agg(mean, axis = 1)
    data['average_Dormant_proba'] = data[['lgr_Dormant_proba', 'xgb_Dormant_proba', 'rf_Dormant_proba']].agg(mean, axis = 1)
    data['average_Churned_proba'] = data[['lgr_Churned_proba', 'xgb_Churned_proba', 'rf_Churned_proba']].agg(mean, axis = 1)
    
    # Based on the definition of lifecycle, it is not possible for a customer to have the below stated transitions
    # Active -> Reactivated, Dormant -> Dormant, Dormant -> Active, Reactivated -> Reactivated
    # Hence, set the probabilities of these cases to 0
    data['average_Active_proba'] = np.where((data['Lifecycle'] == 'Dormant') & (data['average_Active_proba'] > 0), 0, data['average_Active_proba'])
    data['average_Reactivated_proba'] = np.where((data['Lifecycle'] in ('Active', 'Reactivated')) & (data['average_Reactivated_proba'] > 0), 0, data['average_Reactivated_proba'])
    data['average_Dormant_proba'] = np.where((data['Lifecycle'] == 'Dormant') & (data['average_Dormant_proba'] > 0), 0, data['average_Dormant_proba'])
    
    # The lifecycle with the highest probability will be the predicted lifecycle
    max_proba = data[['average_Active_proba', 'average_Reactivated_proba', 'average_Dormant_proba', 'average_Churned_proba']].agg(max, axis = 1)
    data['PredictedLifecycle'] = np.where(data['average_Active_proba'] == max_proba, 'Active', \
                                         +np.where(data['average_Reactivated_proba'] == max_proba, 'Reactivated', \
                                         +np.where(data['average_Dormant_proba'] == max_proba, 'Dormant', \
                                         +np.where(data['average_Churned_proba'] == max_proba, 'Churned'))))
    return data

In [None]:
# Filter the existing active customers from the train_data
train_drop_index = train_data[train_data['CurrLifecycle'] in ('Dormant', 'Churned')].index
train_active = train_data.drop(train_drop_index)

# Returns the df with predictions for train_active (This train data only consists of existing active customers)
train_active_prediction = prediction(lgr_models_list, xgb_models_list, rf_models_list, train_active, input_features)

# Returns the df with predictions for test_data 
# This test data consists of both active and churned customers to generate classification report for model performance evaluation
test_prediction = prediction(gr_models_list, xgb_models_list, rf_models_list, test_data, input_features)

# Generate classification report using the test predictions
report = classification_report(test_prediction['CurrLifecycle'], test_prediction['PredictedLifecycle'])

# Filter out the existing active customers from the test data
test_drop_index = test_prediction[test_prediction['CurrLifecycle'] in ('Dormant', 'Churned')].index
test_active_prediction = test_prediction.drop(test_drop_index)

# Concat the train_active_prediction and test_active_prediction data
predicted_data = pd.concat([train_active_prediction, test_active_prediction], ignore_index=True)    

In [None]:
# Flask API to send report and predicted data to frontend
app = Flask(name)
CORS(app)

@app.route('/api/model')
def get_model():
    return jsonify(report)

@app.route('/api/data')
def get_data():
    return jsonify(predicted_data.to_dict())

if name == 'main':
    app.run(debug=True)