In [75]:
from flask import Flask, request, jsonify
from flask_cors import CORS

import pymysql
import numpy as np
import pandas as pd

from xgboost.sklearn import XGBClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
from imblearn.over_sampling import SMOTE

In [76]:
app = Flask(__name__)
CORS(app)

db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': '1194',
    'db': 'database_project_final',
    'charset': 'utf8mb4',
    'cursorclass': pymysql.cursors.DictCursor
}

connection = pymysql.connect(**db_config)

In [77]:
def parse_new_input(s):
    pairs = s.split(", ")
    data = {}
    for pair in pairs:
        key, value = pair.split(" is ")
        if value.isdigit():
            value = int(value)
        data[key] = value
    data_ML = pd.DataFrame([data])
    return data_ML

In [78]:
def add_data_to_training(s):
    data = parse_new_input(s)
    with connection.cursor() as cursor:
        sql = (
            "INSERT INTO training_data "
            "(id, Gender, Age, Driving_License, Region_Code, Previously_Insured, Vehicle_Age, "
            "Vehicle_Damage, Annual_Premium, Policy_Sales_Channel, Vintage, Response) "
            "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        )
        cursor.execute(sql, (
            int(data['id']), 
            data['Gender'].values[0], 
            int(data['Age']), 
            int(data['Driving_License']), 
            int(data['Region_Code']), 
            int(data['Previously_Insured']), 
            data['Vehicle_Age'].values[0], 
            data['Vehicle_Damage'].values[0], 
            int(data['Annual_Premium']), 
            int(data['Policy_Sales_Channel']), 
            int(data['Vintage']), 
            int(data['Response'])
        ))
    connection.commit()

In [None]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

# Configure SQLAlchemy
app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql+pymysql://root:1194@localhost/database_project_final'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

db = SQLAlchemy(app)

class TrainingData(db.Model):
    __tablename__ = 'training_data'

    id = db.Column(db.Integer, primary_key=True)
    Gender = db.Column(db.String(255))
    Age = db.Column(db.Integer)
    Driving_License = db.Column(db.Integer)
    Region_Code = db.Column(db.Integer)
    Previously_Insured = db.Column(db.Integer)
    Vehicle_Age = db.Column(db.String(255))
    Vehicle_Damage = db.Column(db.String(255))
    Annual_Premium = db.Column(db.Integer)
    Policy_Sales_Channel = db.Column(db.Integer)
    Vintage = db.Column(db.Integer)
    Response = db.Column(db.Integer)
db.create_all()

def add_data_to_training_orm(s):
    data = parse_new_input(s)
    training_data = TrainingData(
        id=int(data['id']),
        Gender=data['Gender'].values[0],
        Age=int(data['Age']),
        Driving_License=int(data['Driving_License']),
        Region_Code=int(data['Region_Code']),
        Previously_Insured=int(data['Previously_Insured']),
        Vehicle_Age=data['Vehicle_Age'].values[0],
        Vehicle_Damage=data['Vehicle_Damage'].values[0],
        Annual_Premium=int(data['Annual_Premium']),
        Policy_Sales_Channel=int(data['Policy_Sales_Channel']),
        Vintage=int(data['Vintage']),
        Response=int(data['Response'])
    )
    db.session.add(training_data)
    db.session.commit()

In [79]:
def encode_and_bind(original_dataframe, feature_to_encode):
    dummies = pd.get_dummies(original_dataframe[[feature_to_encode]],drop_first = True)
    res = pd.concat([original_dataframe, dummies], axis=1)
    res = res.drop([feature_to_encode], axis=1)
    return(res)

def perform_Machine_Learning():
    
    # Ingest data
    training_data = pd.read_sql('SELECT * FROM training_data', connection) 
    test_data = pd.read_sql('SELECT * FROM test_data', connection)
    
    # Split data
    X_train = training_data.drop(columns=['id', 'Response'], axis=1)
    X_test = test_data.drop(columns=['id', 'Response'], axis=1)
    y_train = training_data['Response']
    y_test = test_data['Response']
    
    # Get dummies of categorical features
    train_discrete_features = X_train.select_dtypes('object').columns.to_list()
    for feature in train_discrete_features:
        X_train = encode_and_bind(X_train, feature)
    test_discrete_features = X_test.select_dtypes('object').columns.to_list()
    for feature in test_discrete_features:
        X_test = encode_and_bind(X_test, feature)

    # Process imbalanced data
    over = SMOTE(sampling_strategy='auto', random_state=3)
    X_train_o, y_train_o = over.fit_resample(X_train, y_train)
    
    # Machine Learning
    xgb = XGBClassifier().fit(X_train_o,y_train_o)
    xgb_pred = xgb.predict(X_test)
    
    # Calculate evaluation metrics on the test set
    XGBoost_accuracy = accuracy_score(y_test, xgb_pred)
    XGBoost_auc = roc_auc_score(y_test, xgb_pred)
    
    return xgb, XGBoost_accuracy, XGBoost_auc, X_train.columns

In [80]:
def predict_new_input(s, all_columns, model):
    
    # Parse the input string
    data_ML = parse_new_input(s).drop('id', axis=1)
    
    # Get dummies for categorical features
    data_ML = pd.get_dummies(data_ML)
    for col in all_columns:
        if col not in data_ML.columns:
            data_ML[col] = 0
    new_input_dummies = data_ML[all_columns]
    
    # Predict the new data
    new_pred = model.predict(new_input_dummies)[0]
    new_pred_result = 'Customer is interested' if new_pred == 1 else 'Customer is not interested'
    
    return new_pred_result

In [81]:
@app.route('/run_data_driven_model', methods=['POST'])
def run_data_driven_model():
    data = request.get_json()
    new_input = data.get('voice_input')
    command = data.get('command')

    if command == 'Add':
        add_data_to_training(new_input)
        return jsonify({'message': 'Data added to Training Set'})
    elif command == 'Predict':
        xgb, XGBoost_accuracy, XGBoost_auc, all_columns = perform_Machine_Learning()
        new_input_result = predict_new_input(new_input, all_columns, xgb)
        return jsonify({'Accuracy on the test set is': XGBoost_accuracy, 'AUC on the test set is': XGBoost_auc, 'Prediction of this new data is':new_input_result})

In [82]:
if __name__ == '__main__':
    app.run(port=7777)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:7777
Press CTRL+C to quit
127.0.0.1 - - [18/Dec/2023 18:03:32] "POST /run_data_driven_model HTTP/1.1" 200 -
  training_data = pd.read_sql('SELECT * FROM training_data', connection)
  test_data = pd.read_sql('SELECT * FROM test_data', connection)
127.0.0.1 - - [18/Dec/2023 18:03:41] "POST /run_data_driven_model HTTP/1.1" 200 -
