In [156]:
import pandas as pd
import numpy as np
from collections import Counter

import mlflow
from mlflow.models import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.combine import SMOTETomek

from xgboost import XGBClassifier, plot_importance

import hyperopt as hp
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

## Load data

In [157]:
df = pd.read_csv("data/customer_churn_telecom_services.csv")
df["TotalCharges"] = df["TotalCharges"].fillna(df["tenure"]*df["MonthlyCharges"])

## Churn Prediction

* Churn prediction modelling can be an important tool for any company that aims to maximize customer retention, thus increase revenues and profitability.
* Predicting churn and successfully lowering it by a relatively small fraction, ensure increase in profits especially when it comes down to acquiring new customers. This can be
better modelled by using numbers such as Customer Lifetime Value (CLTV), Customer Acquisition Cost (CAC), Churn rate and Retention rate.

In addition, having historic data such as the purchase history (membership or upgrades), engagemnt data with the product/service sentiment metrics (complaints, feedback) etc can provide event better insights compared to using static data. 

## Use Case

* The dataset provides a plethora of data with regards to telecom customers. The data are static and it doesnt include any history data.
* The objective is to use the dataset and 'profile' the type of customer who is more likely to churn.

## Data Split

In [158]:
categ_feat = df.select_dtypes(include="object").columns.tolist()
categ_feat.remove("Churn")

df = pd.get_dummies(df, columns=categ_feat, drop_first=True)
df["Churn"] = df["Churn"].map({"No": 0, "Yes": 1})

#### Generate synthetic data

In [159]:
X = df.drop('Churn', axis=1)
y = df['Churn']

# sm = SMOTE(random_state=3)
# ada = ADASYN(random_state=3)
smtom = SMOTETomek(random_state=3)

# X, y = sm.fit_resample(X, y)
# X, y = ada.fit_resample(X, y)
X, y = smtom.fit_resample(X, y)


print('Resampled dataset shape %s' % Counter(y))

Resampled dataset shape Counter({0: 4753, 1: 4753})


In [160]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

## Hyperparameter tuning - MLflow experiments

In [161]:
# mlflow server --host 127.0.0.1 --port 8080

In [162]:
mlflow.set_tracking_uri("http://localhost:8080")
mlflow.set_experiment("Churn prediction 2")

2025/03/01 17:05:39 INFO mlflow.tracking.fluent: Experiment with name 'Churn prediction 2' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/235808196041909500', creation_time=1740848739362, experiment_id='235808196041909500', last_update_time=1740848739362, lifecycle_stage='active', name='Churn prediction 2', tags={}>

In [163]:
# hyperparameter space
space={
    'max_depth': hp.quniform("max_depth", 3, 18, 1),
    'gamma': hp.uniform ('gamma', 1,9),
    'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
    'reg_lambda' : hp.uniform('reg_lambda', 0,1),
    'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
    'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
    'n_estimators': hp.quniform('n_estimators', 50, 200, 10)
    }

In [164]:
signature = infer_signature(X_train, y_train)



In [165]:
def hyperparameter_tuning(space):
    model = XGBClassifier(n_estimators = int(space['n_estimators']), 
                          max_depth = int(space['max_depth']), 
                          gamma = space['gamma'],
                          reg_alpha = int(space['reg_alpha']), 
                          min_child_weight=space['min_child_weight'],
                          colsample_bytree=space['colsample_bytree'],
                          eval_metric=["rmse"],
                          early_stopping_rounds=10,)
    
    evaluation = [(X_train, y_train), (X_test, y_test)]
    
    with mlflow.start_run(nested=True):
        model.fit(X_train, 
                y_train,
                eval_set=evaluation,
                verbose=False)

        pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, pred>0.5)

        # Log parameters and results
        mlflow.log_params(space)
        mlflow.log_metric("accuracy", accuracy)

        # Log model
        mlflow.xgboost.log_model(model, "model", signature=signature)

        print ("SCORE:", accuracy)
        return {'loss': -accuracy, 'status': STATUS_OK, 'model': model}

In [166]:
with mlflow.start_run():
    trials = Trials()
    best = fmin(fn=hyperparameter_tuning,
                space=space,
                algo=tpe.suggest,
                max_evals=30,
                trials=trials)
    best_run = sorted(trials.results, key=lambda x: x["loss"])[0]

    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    mlflow.log_metric("accuracy", -best_run["loss"])

    # log bets model
    mlflow.xgboost.log_model(best_run["model"], "model", signature=signature)

    # Print out the best parameters and corresponding loss
    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")

  0%|          | 0/30 [00:00<?, ?trial/s, best loss=?]




SCORE:                                                
0.8175604626708728                                    
🏃 View run indecisive-toad-959 at: http://localhost:8080/#/experiments/235808196041909500/runs/1640800987f348f783401d5dfd23ae25

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500

  3%|▎         | 1/30 [00:04<01:56,  4.03s/trial, best loss: -0.8175604626708728]




SCORE:                                                                           
0.8138801261829653                                                               
🏃 View run bald-shrimp-547 at: http://localhost:8080/#/experiments/235808196041909500/runs/0014282137b04dc89f3f26e92ac697ee

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

  7%|▋         | 2/30 [00:07<01:40,  3.57s/trial, best loss: -0.8175604626708728]




SCORE:                                                                           
0.8144058885383807                                                               
🏃 View run treasured-bee-45 at: http://localhost:8080/#/experiments/235808196041909500/runs/7fcd78fcfdc6440f8431a39c6cb3ccd1

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 10%|█         | 3/30 [00:10<01:32,  3.43s/trial, best loss: -0.8175604626708728]




SCORE:                                                                           
0.8165089379600421                                                               
🏃 View run stylish-ant-18 at: http://localhost:8080/#/experiments/235808196041909500/runs/fd0cd6119034411f918b60eacd040373

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 13%|█▎        | 4/30 [00:13<01:28,  3.41s/trial, best loss: -0.8175604626708728]




SCORE:                                                                           
0.8065194532071503                                                               
🏃 View run mercurial-ray-860 at: http://localhost:8080/#/experiments/235808196041909500/runs/314f494a0b5e43aa873c96d860170ce4

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 17%|█▋        | 5/30 [00:17<01:24,  3.39s/trial, best loss: -0.8175604626708728]




SCORE:                                                                           
0.8212407991587802                                                               
🏃 View run treasured-cat-92 at: http://localhost:8080/#/experiments/235808196041909500/runs/c927b93d2a994b9f96924ced8c95f2ab

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 20%|██        | 6/30 [00:20<01:20,  3.34s/trial, best loss: -0.8212407991587802]




SCORE:                                                                           
0.8175604626708728                                                               
🏃 View run zealous-snail-192 at: http://localhost:8080/#/experiments/235808196041909500/runs/101c78309848473e8a55d161cbb02cb6

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 23%|██▎       | 7/30 [00:23<01:14,  3.26s/trial, best loss: -0.8212407991587802]




SCORE:                                                                           
0.8175604626708728                                                               
🏃 View run trusting-tern-255 at: http://localhost:8080/#/experiments/235808196041909500/runs/10d50e85d5f545b4a8254c8d37e7a1f0

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 27%|██▋       | 8/30 [00:26<01:09,  3.15s/trial, best loss: -0.8212407991587802]




SCORE:                                                                           
0.814931650893796                                                                
🏃 View run legendary-tern-278 at: http://localhost:8080/#/experiments/235808196041909500/runs/bef681e43d764c29b9127e80f2df47f3

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 30%|███       | 9/30 [00:29<01:04,  3.08s/trial, best loss: -0.8212407991587802]




SCORE:                                                                           
0.8117770767613038                                                               
🏃 View run bustling-asp-587 at: http://localhost:8080/#/experiments/235808196041909500/runs/74dc055dd6444899a0b1e908bfec8895

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500    

 33%|███▎      | 10/30 [00:32<01:00,  3.01s/trial, best loss: -0.8212407991587802]




SCORE:                                                                            
0.8243953732912723                                                                
🏃 View run gaudy-fawn-240 at: http://localhost:8080/#/experiments/235808196041909500/runs/431f01f88ff24ba8ac0ea4f2e94c0239

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 37%|███▋      | 11/30 [00:35<00:57,  3.02s/trial, best loss: -0.8243953732912723]




SCORE:                                                                            
0.8080967402733964                                                                
🏃 View run worried-rook-489 at: http://localhost:8080/#/experiments/235808196041909500/runs/d6941a9b53b64688b89de76b09700018

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 40%|████      | 12/30 [00:38<00:53,  2.98s/trial, best loss: -0.8243953732912723]




SCORE:                                                                            
0.8107255520504731                                                                
🏃 View run melodic-flea-345 at: http://localhost:8080/#/experiments/235808196041909500/runs/2c8d8fb50d0048ec982a50a16dcd16f9

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 43%|████▎     | 13/30 [00:41<00:49,  2.94s/trial, best loss: -0.8243953732912723]




SCORE:                                                                            
0.8159831756046267                                                                
🏃 View run carefree-lamb-64 at: http://localhost:8080/#/experiments/235808196041909500/runs/f9eb5cfe83d445d382093ee7bd96ac0c

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 47%|████▋     | 14/30 [00:43<00:46,  2.92s/trial, best loss: -0.8243953732912723]




SCORE:                                                                            
0.8086225026288117                                                                
🏃 View run brawny-hawk-138 at: http://localhost:8080/#/experiments/235808196041909500/runs/b38b78870b9f40d2bf22efdf850c1251

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 50%|█████     | 15/30 [00:46<00:43,  2.92s/trial, best loss: -0.8243953732912723]




SCORE:                                                                            
0.8144058885383807                                                                
🏃 View run resilient-shoat-444 at: http://localhost:8080/#/experiments/235808196041909500/runs/2fb0e5d7833849e2805061b95135bfc2

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 53%|█████▎    | 16/30 [00:49<00:41,  2.94s/trial, best loss: -0.8243953732912723]




SCORE:                                                                            
0.8117770767613038                                                                
🏃 View run rogue-sheep-571 at: http://localhost:8080/#/experiments/235808196041909500/runs/20b05575be1c471dab1e035090dbc4d3

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 57%|█████▋    | 17/30 [00:53<00:39,  3.00s/trial, best loss: -0.8243953732912723]




SCORE:                                                                            
0.8259726603575184                                                                
🏃 View run intrigued-gnu-506 at: http://localhost:8080/#/experiments/235808196041909500/runs/2fb48d02ca2349d094b0f4f86cd53a36

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 60%|██████    | 18/30 [00:56<00:36,  3.01s/trial, best loss: -0.8259726603575184]




SCORE:                                                                            
0.8217665615141956                                                                
🏃 View run sassy-doe-406 at: http://localhost:8080/#/experiments/235808196041909500/runs/1fc4d10454a14497b5857f0101f9911c

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 63%|██████▎   | 19/30 [00:59<00:33,  3.05s/trial, best loss: -0.8259726603575184]




SCORE:                                                                            
0.8291272344900105                                                                
🏃 View run nosy-loon-595 at: http://localhost:8080/#/experiments/235808196041909500/runs/876757a8bf29454997281d12e7377475

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 67%|██████▋   | 20/30 [01:02<00:31,  3.17s/trial, best loss: -0.8291272344900105]




SCORE:                                                                            
0.8333333333333334                                                                
🏃 View run bedecked-penguin-109 at: http://localhost:8080/#/experiments/235808196041909500/runs/2fecb4f117d54a5e9a5b33429d7aa3bb

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 70%|███████   | 21/30 [01:05<00:28,  3.21s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.8296529968454258                                                                
🏃 View run thoughtful-elk-545 at: http://localhost:8080/#/experiments/235808196041909500/runs/d03d1d55759f4099b96444f501d3730a

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 73%|███████▎  | 22/30 [01:09<00:26,  3.27s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.8228180862250263                                                                
🏃 View run bemused-dove-136 at: http://localhost:8080/#/experiments/235808196041909500/runs/3ab6976d85de40d4b8f2ce4385102364

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 77%|███████▋  | 23/30 [01:12<00:23,  3.32s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.8270241850683491                                                                
🏃 View run fun-sponge-726 at: http://localhost:8080/#/experiments/235808196041909500/runs/8a30eee82f454531b472482c76b6fa6c

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 80%|████████  | 24/30 [01:16<00:21,  3.51s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.823869610935857                                                                 
🏃 View run unique-horse-559 at: http://localhost:8080/#/experiments/235808196041909500/runs/7c6b7f19682e4b798bd3af1463b30328

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 83%|████████▎ | 25/30 [01:20<00:17,  3.55s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.8275499474237644                                                                
🏃 View run aged-wren-790 at: http://localhost:8080/#/experiments/235808196041909500/runs/323a9ad38fae474dac5015b468d84fc8

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 87%|████████▋ | 26/30 [01:23<00:13,  3.49s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.8201892744479495                                                                
🏃 View run intrigued-snipe-748 at: http://localhost:8080/#/experiments/235808196041909500/runs/4b0254618d0e46b386803c7211ac9345

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 90%|█████████ | 27/30 [01:26<00:10,  3.39s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.8217665615141956                                                                
🏃 View run auspicious-smelt-759 at: http://localhost:8080/#/experiments/235808196041909500/runs/34cbe000ae1b44f6a0e2416d72f7c2bf

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 93%|█████████▎| 28/30 [01:30<00:06,  3.35s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.8228180862250263                                                                
🏃 View run enthused-flea-531 at: http://localhost:8080/#/experiments/235808196041909500/runs/c51e2fe3095749cf821379a200b0de42

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

 97%|█████████▋| 29/30 [01:33<00:03,  3.31s/trial, best loss: -0.8333333333333334]




SCORE:                                                                            
0.823869610935857                                                                 
🏃 View run flawless-asp-594 at: http://localhost:8080/#/experiments/235808196041909500/runs/24d1b51da1614bc597e6accf067ed655

🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500     

100%|██████████| 30/30 [01:36<00:00,  3.21s/trial, best loss: -0.8333333333333334]




Best parameters: {'colsample_bytree': 0.9959164208865653, 'gamma': 1.359593592969206, 'max_depth': 12.0, 'min_child_weight': 4.0, 'n_estimators': 150.0, 'reg_alpha': 66.0, 'reg_lambda': 0.41252077656727526}
Best eval rmse: -0.8333333333333334
🏃 View run luminous-wasp-592 at: http://localhost:8080/#/experiments/235808196041909500/runs/256c5f7592f4420396fc829c4d105093
🧪 View experiment at: http://localhost:8080/#/experiments/235808196041909500


## Load the best model artifact

In [140]:
# download model artifact directory
mlflow.artifacts.download_artifacts("mlflow-artifacts:/503687084130154334/258f3da1ff74463fa79355c959d4b8f5/artifacts/model/model.xgb", dst_path="best_model")
print("Done")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Done


In [141]:
# Load model
model = XGBClassifier()
model.load_model("./best_model/model.xgb")

In [154]:
predict_y = model.predict(X_test)
accuracy = accuracy_score(y_test, predict_y>0.5)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8268625393494229


In [155]:
model.predict(np.expand_dims(np.array(X_test.loc[0, :]), 0))

array([1])