In [7]:
import numpy as np
import pandas as pd
from flask import Flask, jsonify
from flask_cors import CORS
from sklearn.metrics import classification_report

In [None]:
def main(lgr, xgb, rf, train_data, test_data):
    # Filter the existing active customers from the train_data
    train_drop_index = train_data[train_data['Lifecycle'] in ('Dormant', 'Churned')].index
    train_active = train_data.drop(train_drop_index)
    
    # Generate predictions for train_active (This train data only consists of existing active customers)
    train_active_prediction = prediction(lgr, xgb, rf, train_active)
    
    # Generate predictions for test_data 
    # This test data consists of both active and churned customers to generate classification report for model performance evaluation
    test_prediction = prediction(lgr, xgb, rf, test_data)
    
    # Generate classification report using the test predictions
    report = classification_report(test_prediction['Lifecycle'], test_prediction['Predicted_Lifecycle'])
    
    # Filter out the existing active customers from the test data
    test_drop_index = test_prediction[test_prediction['Lifecycle'] in ('Dormant', 'Churned')].index
    test_active_prediction = test_prediction.drop(test_drop_index)
    
    # Concat the train_active_prediction and test_active_prediction data
    predicted_data = pd.concat([train_active_prediction, test_active_prediction], ignore_index=True)
    
    # Discuss with frontend on how they want to receive the classification report and predicted data
    
    

In [None]:
def prediction(lgr, xgb, rf, data):
    # Feature Extraction for data
    features = data[data.columns not in ('Lifecycle')] #Add in columns to exclude accordingly
    
    # Generate predictions for data
    for i in range(len(lgr)):
        # lgr[0], xgb[0], rf[0] is the model with binary label Active/Non-Active
        if i == 0:
            data['lgr_Active_proba'] = lgr[0].predict_proba(features)
            data['xgb_Active_proba'] = xgb[0].predict_proba(features)
            data['rf_Active_proba'] = rf[0].predict_proba(features)
            
        # lgr[1], xgb[1], rf[1] is the model with binary label Reactivated/Non-Reactivated
        elif i == 1:
            data['lgr_Reactivated_proba'] = lgr[1].predict_proba(features)
            data['xgb_Reactivated_proba'] = xgb[1].predict_proba(features)
            data['rf_Reactivated_proba'] = rf[1].predict_proba(features)
            
        # lgr[2], xgb[2], rf[2] is the model with binary label Dormant/Non-Dormant
        elif i == 2:
            data['lgr_Dormant_proba'] = lgr[2].predict_proba(features)
            data['xgb_Dormant_proba'] = xgb[2].predict_proba(features)
            data['rf_Dormant_proba'] = rf[2].predict_proba(features)
           
        # lgr[3], xgb[3], rf[3] is the model with binary label Churned/Non-Churned
        elif i == 3:
            data['lgr_Churned_proba'] = lgr[3].predict_proba(features)
            data['xgb_Churned_proba'] = xgb[3].predict_proba(features)
            data['rf_Churned_proba'] = rf[3].predict_proba(features)
    
    # Calculate the average probability from the probabilities generated by each model (Ensemble Learning)
    data['average_Active_proba'] = data[['lgr_Active_proba', 'xgb_Active_proba', 'rf_Active_proba']].agg(mean, axis = 1)
    data['average_Reactivated_proba'] = data[['lgr_Reactivated_proba', 'xgb_Reactivated_proba', 'rf_Reactivated_proba']].agg(mean, axis = 1)
    data['average_Dormant_proba'] = data[['lgr_Dormant_proba', 'xgb_Dormant_proba', 'rf_Dormant_proba']].agg(mean, axis = 1)
    data['average_Churned_proba'] = data[['lgr_Churned_proba', 'xgb_Churned_proba', 'rf_Churned_proba']].agg(mean, axis = 1)
    
    # Based on the definition of lifecycle, it is not possible for a customer to have the below stated transitions
    # Active -> Reactivated, Dormant -> Dormant, Dormant -> Active, Reactivated -> Reactivated
    # Hence, set the probabilities of these cases to 0
    data['average_Active_proba'] = np.where((data['Lifecycle'] == 'Dormant') & (data['average_Active_proba'] > 0), 0, data['average_Active_proba'])
    data['average_Reactivated_proba'] = np.where((data['Lifecycle'] in ('Active', 'Reactivated')) & (data['average_Reactivated_proba'] > 0), 0, data['average_Reactivated_proba'])
    data['average_Dormant_proba'] = np.where((data['Lifecycle'] == 'Dormant') & (data['average_Dormant_proba'] > 0), 0, data['average_Dormant_proba'])
    
    # The lifecycle with the highest probability will be the predicted lifecycle
    max_proba = data[['average_Active_proba', 'average_Reactivated_proba', 'average_Dormant_proba', 'average_Churned_proba']].agg(max, axis = 1)
    data['Predicted_Lifecycle'] = np.where(data['average_Active_proba'] == max_proba, 'Active', \
                                         +np.where(data['average_Reactivated_proba'] == max_proba, 'Reactivated', \
                                         +np.where(data['average_Dormant_proba'] == max_proba, 'Dormant', \
                                         +np.where(data['average_Churned_proba'] == max_proba, 'Churned'))))
    return data

In [None]:
app = Flask(name)
CORS(app)

@app.route('/api/data')
def get_data():
    # Your code to fetch and process data from the desired source
    # For simplicity, let's assume dummy data
    data = test()
    return jsonify(data)

if name == 'main':
    app.run(debug=True)