In [1]:
#import libs
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter


In [2]:
datatrend=pd.read_csv('../data/cleaned_data.csv')

In [3]:
datatrend.head()

Unnamed: 0,InvoiceNo,InvoiceDate,CustomerID,StockCode,Description,Quantity,UnitPrice,Country,Total_price,InvoiceYear,InvoiceMonth
0,536365,2010-12-01,17850.0,SC1734,Electronics,65,10.23,Egypt,664.95,2010,2010-12
1,536365,2010-12-01,17850.0,SC2088,Furniture,95,19.61,Mali,1862.95,2010,2010-12
2,536365,2010-12-01,17850.0,SC3463,Books,78,61.49,Mali,4796.22,2010,2010-12
3,536365,2010-12-01,17850.0,SC6228,Toys,15,24.73,South Africa,370.95,2010,2010-12
4,536365,2010-12-01,17850.0,SC2149,Toys,50,38.83,Rwanda,1941.5,2010,2010-12


In [4]:
rfm = pd.read_csv('../data/rfm_data.csv')
rfm.head()

Unnamed: 0,Recency,Frequency,Monetary,T,AvgBasketsize,TotalTransactions,Churned
0,341,2,5342.4,341,59.5,2,1
1,7,7,431501.0,372,46.763736,7,0
2,98,4,82378.47,372,49.645161,4,0
3,37,1,176075.12,37,47.041096,1,0
4,310,1,48173.37,310,53.941176,1,1


# MODELLING

Data Splitting

In [5]:
from sklearn.model_selection import train_test_split

X = rfm.drop(columns=['Churned']) # features
y = rfm['Churned']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

In [6]:
import joblib
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import os

#create 'models' directory
models_dir = os.path.join(os.path.dirname(os.getcwd()), 'models')
os.makedirs(models_dir, exist_ok=True)

#Train and save models
rf_clf = RandomForestClassifier(random_state=42).fit(X_train, y_train)
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss').fit(X_train, y_train)

#save models
joblib.dump(rf_clf, os.path.join(models_dir, 'random_forest_model.pkl'))
joblib.dump(xgb_clf, os.path.join(models_dir, 'xgboost_model.pkl'))

print('Models saved successfully')

Models saved successfully


In [7]:
from sklearn.metrics import confusion_matrix, classification_report
#Create 'evaluations' directory

eval_dir = os.path.join(os.path.dirname(os.getcwd()), 'evaluations')
os.makedirs(eval_dir, exist_ok=True)

#Define a function to save confusion matrix and classification report
def save_evaluation(model_name, y_true, y_pred):
    # Confusion matrix
    plt.figure(figsize=(6,4))
    sns.heatmap(confusion_matrix(y_true, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(os.path.join(eval_dir,f'{model_name}_ConfusionMatrix.png'))
    plt.close()


    # Classification report
    report = classification_report(y_true, y_pred)
    plt.figure(figsize=(8,6))
    plt.text(0.01, 1.0, report, {'fontsize' : 10}, fontproperties='monospace')
    plt.axis('off')
    plt.savefig(os.path.join(eval_dir,f'{model_name}_classificationReport.png'), bbox_inches='tight')
    plt.close()


    # Also print results
    print(f'{model_name} Results:')
    print(report)
    print(confusion_matrix(y_true, y_pred))
    print('-' * 50)


# Random Forest evaluation
save_evaluation('RandomForest', y_test, rf_clf.predict(X_test))

# XGBoost evaluation
save_evaluation('XGBoost', y_test, xgb_clf.predict(X_test))


import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# Create 'evaluations' directory
eval_dir = os.path.join(os.path.dirname(os.getcwd()), 'evaluations')
os.makedirs(eval_dir, exist_ok=True)

# Define a function to save evaluation results
def save_evaluation(model_name, y_true, y_pred):
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(os.path.join(eval_dir, f'{model_name}_ConfusionMatrix.png'))
    plt.close()

    # Classification report
    report = classification_report(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    plt.text(0.01, 1.0, report, {'fontsize': 10}, fontproperties='monospace')
    plt.axis('off')
    plt.savefig(os.path.join(eval_dir, f'{model_name}_ClassificationReport.png'), bbox_inches='tight')
    plt.close()

    # Print results
    print(f'{model_name} Results')
    print(report)
    print(cm)
    print('-' * 50)

# Random Forest evaluation
save_evaluation('RandomForest', y_test, rf_clf.predict(X_test))

# XGBoost evaluation
save_evaluation('XGBoost', y_test, xgb_clf.predict(X_test))



RandomForest Results:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       716
           1       1.00      1.00      1.00       159

    accuracy                           1.00       875
   macro avg       1.00      1.00      1.00       875
weighted avg       1.00      1.00      1.00       875

[[716   0]
 [  0 159]]
--------------------------------------------------
XGBoost Results:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       716
           1       1.00      1.00      1.00       159

    accuracy                           1.00       875
   macro avg       1.00      1.00      1.00       875
weighted avg       1.00      1.00      1.00       875

[[716   0]
 [  0 159]]
--------------------------------------------------
RandomForest Results
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       716
           1       1.00      

In [8]:
!pip install gradio



In [9]:
import gradio as gr
import joblib
import numpy as np

# Load the trained Random Forest model
rf_clf = joblib.load('../models/random_forest_model.pkl')

# Prediction function for Gradio
def predict(*input_data):
    input_data = np.array(input_data).reshape(1, -1)  # Convert to numpy array and reshape for model
    prediction = rf_clf.predict(input_data)  # Predict churn (0 = active, 1 = churned)
    return "Churned" if prediction[0] == 1 else "Active"

# Gradio interface function
def create_interface():
    # Define input fields based on features (columns in your model input data)
    input_features = [gr.Number(label=col) for col in ['Recency', 'Frequency', 'Monetary', 'CustomerLifetime', 'AvgBasketSize', 'TotalTransactions']]
    output = gr.Label()

    # Create the Gradio interface with live=False to wait for input
    interface = gr.Interface(
        fn=predict,
        inputs=input_features,
        outputs=output,
        live=False  # Wait for "Submit"
    )

    interface.launch(share=True)

# Launch the Gradio interface
create_interface()


* Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


In [10]:
rfm

Unnamed: 0,Recency,Frequency,Monetary,T,AvgBasketsize,TotalTransactions,Churned
0,341,2,5342.40,341,59.500000,2,1
1,7,7,431501.00,372,46.763736,7,0
2,98,4,82378.47,372,49.645161,4,0
3,37,1,176075.12,37,47.041096,1,0
4,310,1,48173.37,310,53.941176,1,1
...,...,...,...,...,...,...,...
4367,282,1,18907.36,282,49.500000,1,1
4368,190,1,26009.01,190,54.142857,1,0
4369,7,3,36010.74,129,60.461538,3,0
4370,7,16,2008747.62,341,53.615079,16,0
