In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import important libraries

In [3]:
!pip install dill

Collecting dill
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Downloading dill-0.3.9-py3-none-any.whl (119 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/119.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m112.6/119.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.4/119.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.9


In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import dill
import warnings
from sklearn.exceptions import FitFailedWarning

In [41]:
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FitFailedWarning)

# Load test data


In [7]:
X_test = pd.read_parquet('/content/drive/MyDrive/Churn Prediction/X_test')
y_test = pd.read_parquet('/content/drive/MyDrive/Churn Prediction/y_test')

# Load preprocessing pipline

In [6]:
with open('/content/drive/MyDrive/Churn Prediction/test_pipeline.pkl', 'rb') as f:
    pipline = dill.load(f)

# Load all models

In [8]:
with open('/content/drive/MyDrive/Churn Prediction/hard_voting_model_churn.pkl', 'rb') as f:
    churn_model = dill.load(f)

In [9]:
with open('/content/drive/MyDrive/Churn Prediction/xgboost_regr_profit.pkl', 'rb') as f:
    profit_model = dill.load(f)

# Create class for model aggregation

In [47]:
class AggregationModel:
    def __init__(self, churn_model, profit_model, pipeline):
        self.churn_model = churn_model
        self.profit_model = profit_model
        self.pipeline = pipeline
    
    def predict(self, X):
        warnings.filterwarnings("ignore", category=UserWarning)
        warnings.filterwarnings("ignore", category=FitFailedWarning)
        
        X_transformed = self.pipeline.transform(X)
        X_transformed = np.array(X_transformed)
        
        churn_pred = self.churn_model.predict(X_transformed)
        
        profit_pred = []
        
        for i, churn in enumerate(churn_pred):
            if churn == 0:
                profit_pred.append(-13.58)
            else:
                profit_pred.append(self.profit_model.predict(X_transformed[i].reshape(1, -1))[0])
        
        return churn_pred, np.array(profit_pred)

# Create custom model object

In [48]:
custom_model = AggregationModel(churn_model, profit_model, pipline)

# Predict test data

In [44]:
y_pred = custom_model.predict(X_test)

# Import important libraries for evaluation

In [49]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, precision_score, recall_score, f1_score, precision_recall_curve, accuracy_score, r2_score, mean_absolute_error, mean_squared_error

# Churn prediction evaluation class

In [50]:
class BinaryClassificationEvaluation:
    def __init__(self, y_test, y_pred, y_pred_prob=None):
        self.y_test = y_test
        self.y_pred = y_pred
        self.y_pred_prob = y_pred_prob

    def accuracy(self):
        accuracy = accuracy_score(self.y_test, self.y_pred)
        return accuracy

    def classification_report(self):
        report = classification_report(self.y_test, self.y_pred)
        return report

    def precision_recall_f1(self):
        precision = precision_score(self.y_test, self.y_pred)
        recall = recall_score(self.y_test, self.y_pred)
        f1 = f1_score(self.y_test, self.y_pred)
        return precision, recall, f1
    def roc_score(self):
        if self.y_pred_prob is None:
            raise ValueError("y_pred_prob must be provided for ROC curve evaluation.")
        auc_score = roc_auc_score(self.y_test, self.y_pred_prob)
        return auc_score

    def visualize_confusion_matrix(self):
        cm = confusion_matrix(self.y_test, self.y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=['Not Churn', 'Churn'],
                    yticklabels=['Not Churn', 'Churn'])
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Confusion Matrix')
        plt.show()

    def roc_curve_visualization(self):
        auc_score = self.roc_score()
        print('=========================')
        print('||','ROC AUC Score:', auc_score.round(2),'||')
        print('=========================')
        fpr, tpr, _ = roc_curve(self.y_test, self.y_pred_prob)
        plt.plot(fpr, tpr, color='blue', label='ROC Curve (area = %0.2f)' % auc_score)
        plt.plot([0, 1], [0, 1], color='red', linestyle='--')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve')
        plt.legend(loc='lower right')
        plt.show()

    def precision_recall_curve_plot(self):
        if self.y_pred_prob is None:
            raise ValueError("y_pred_prob must be provided for Precision-Recall curve evaluation.")

        precision, recall, _ = precision_recall_curve(self.y_test, self.y_pred_prob)
        plt.plot(recall, precision, marker='.')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall Curve')
        plt.show()

# Profit prediction evaluation class

In [51]:
class RegressionEvaluation:
    def __init__(self, y_test, y_pred):
        self.y_test = y_test
        self.y_pred = y_pred

    def r2(self):
        return r2_score(self.y_test, self.y_pred)

    def mean_absolute_error(self):
        return mean_absolute_error(self.y_test, self.y_pred)

    def mean_squared_error(self):
        return mean_squared_error(self.y_test, self.y_pred)

    def root_mean_squared_error(self):
        return np.sqrt(self.mean_squared_error())

    def residuals_plot(self):
        residuals = self.y_test - self.y_pred
        plt.figure(figsize=(10, 6))
        sns.scatterplot(x=self.y_pred, y=residuals)
        plt.axhline(0, color='red', linestyle='--', linewidth=2)
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        plt.title('Residuals vs Predicted Values')
        plt.show()

    def predictions_vs_actual_plot(self):
        plt.figure(figsize=(10, 6))
        plt.scatter(self.y_test, self.y_pred, alpha=0.5)
        plt.plot([self.y_test.min(), self.y_test.max()],
                 [self.y_test.min(), self.y_test.max()],
                 'r--', lw=2)
        plt.xlabel('Actual Values')
        plt.ylabel('Predicted Values')
        plt.title('Predictions vs Actual Values')
        plt.show()

    def summary(self):
        print("R² Score:", self.r2())
        print("Mean Absolute Error:", self.mean_absolute_error())
        print("Mean Squared Error:", self.mean_squared_error())
        print("Root Mean Squared Error:", self.root_mean_squared_error())

# Create evaluation objects

In [56]:
churn_model_evaluation = BinaryClassificationEvaluation(y_test.iloc[:,0], y_pred[0])
profit_model_evaluation = RegressionEvaluation(y_test.iloc[:,1], y_pred[1])

In [57]:
print(churn_model_evaluation.classification_report())

              precision    recall  f1-score   support

           0       0.87      0.91      0.89     15296
           1       0.78      0.70      0.74      7226

    accuracy                           0.84     22522
   macro avg       0.82      0.80      0.81     22522
weighted avg       0.84      0.84      0.84     22522



In [58]:
profit_model_evaluation.summary()

R² Score: 0.3220637138517549
Mean Absolute Error: 3.1193497611986674
Mean Squared Error: 49.92138268830222
Root Mean Squared Error: 7.065506541522852


# Save the model

In [59]:
with open('/content/drive/MyDrive/Churn Prediction/churn_profit_model.pkl', 'wb') as f:
    dill.dump(custom_model, f)