In [120]:
import pandas as pd
import numpy as np
from collections import Counter

import mlflow
from mlflow.models import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.combine import SMOTETomek

from xgboost import XGBClassifier, plot_importance

import hyperopt as hp
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

## Load data

In [93]:
df = pd.read_csv("data/customer_churn_telecom_services.csv")
df["TotalCharges"] = df["TotalCharges"].fillna(df["tenure"]*df["MonthlyCharges"])

## Churn Prediction

* Churn prediction modelling can be an important tool for any company that aims to maximize customer retention, thus increase revenues and profitability.
* Predicting churn and successfully lowering it by a relatively small fraction, ensure increase in profits especially when it comes down to acquiring new customers. This can be
better modelled by using numbers such as Customer Lifetime Value (CLTV), Customer Acquisition Cost (CAC), Churn rate and Retention rate.

In addition, having historic data such as the purchase history (membership or upgrades), engagemnt data with the product/service sentiment metrics (complaints, feedback) etc can provide event better insights compared to using static data. 

## Use Case

* The dataset provides a plethora of data with regards to telecom customers. The data are static and it doesnt include any history data.
* The objective is to use the dataset and 'profile' the type of customer who is more likely to churn.

## Data Split

In [94]:
categ_feat = df.select_dtypes(include="object").columns.tolist()
categ_feat.remove("Churn")

df = pd.get_dummies(df, columns=categ_feat, drop_first=True)
df["Churn"] = df["Churn"].map({"No": 0, "Yes": 1})

#### Generate synthetic data

In [95]:
X = df.drop('Churn', axis=1)
y = df['Churn']

# sm = SMOTE(random_state=3)
# ada = ADASYN(random_state=3)
smtom = SMOTETomek(random_state=3)

# X, y = sm.fit_resample(X, y)
# X, y = ada.fit_resample(X, y)
X, y = smtom.fit_resample(X, y)


print('Resampled dataset shape %s' % Counter(y))

Resampled dataset shape Counter({0: 4765, 1: 4765})


In [96]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

## Hyperparameter tuning - MLflow experiments

In [112]:
mlflow.set_tracking_uri("http://localhost:8080")
mlflow.set_experiment("Churn prediction")

2025/02/27 23:37:21 INFO mlflow.tracking.fluent: Experiment with name 'Churn prediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/503687084130154334', creation_time=1740699441959, experiment_id='503687084130154334', last_update_time=1740699441959, lifecycle_stage='active', name='Churn prediction', tags={}>

In [121]:
# hyperparameter space
space={
    'max_depth': hp.quniform("max_depth", 3, 18, 1),
    'gamma': hp.uniform ('gamma', 1,9),
    'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
    'reg_lambda' : hp.uniform('reg_lambda', 0,1),
    'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
    'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
    'n_estimators': hp.quniform('n_estimators', 50, 200, 10)
    }

In [122]:
signature = infer_signature(X_train, y_train)



In [125]:
def hyperparameter_tuning(space):
    model = XGBClassifier(n_estimators = int(space['n_estimators']), 
                          max_depth = int(space['max_depth']), 
                          gamma = space['gamma'],
                          reg_alpha = int(space['reg_alpha']), 
                          min_child_weight=space['min_child_weight'],
                          colsample_bytree=space['colsample_bytree'],
                          eval_metric=["rmse"],
                          early_stopping_rounds=10,)
    
    evaluation = [(X_train, y_train), (X_test, y_test)]
    
    with mlflow.start_run(nested=True):
        model.fit(X_train, 
                y_train,
                eval_set=evaluation,
                verbose=False)

        pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, pred>0.5)

        # Log parameters and results
        mlflow.log_params(space)
        mlflow.log_metric("accuracy", accuracy)

        # Log model
        mlflow.xgboost.log_model(model, "model", signature=signature)

        print ("SCORE:", accuracy)
        return {'loss': -accuracy, 'status': STATUS_OK, 'model': model}

In [126]:
with mlflow.start_run():
    trials = Trials()
    best = fmin(fn=hyperparameter_tuning,
                space=space,
                algo=tpe.suggest,
                max_evals=30,
                trials=trials)
    best_run = sorted(trials.results, key=lambda x: x["loss"])[0]

    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    mlflow.log_metric("accuracy", -best_run["loss"])

    # log bets model
    mlflow.xgboost.log_model(best_run["model"], "model", signature=signature)

    # Print out the best parameters and corresponding loss
    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")

  0%|          | 0/30 [00:00<?, ?trial/s, best loss=?]




SCORE:                                                
0.8058761804826863                                    
🏃 View run rebellious-moth-837 at: http://localhost:8080/#/experiments/503687084130154334/runs/d8d9d0bebb98402b92e7699eebc5ecaa

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334

  3%|▎         | 1/30 [00:04<02:03,  4.27s/trial, best loss: -0.8058761804826863]




SCORE:                                                                           
0.8116474291710388                                                               
🏃 View run worried-gnat-821 at: http://localhost:8080/#/experiments/503687084130154334/runs/7836e8c3f26e46888ac61c92cdf7df73

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

  7%|▋         | 2/30 [00:07<01:39,  3.57s/trial, best loss: -0.8116474291710388]




SCORE:                                                                           
0.8179433368310598                                                               
🏃 View run likeable-steed-932 at: http://localhost:8080/#/experiments/503687084130154334/runs/858554500bf44346809a509e0a8c6c78

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 10%|█         | 3/30 [00:10<01:30,  3.36s/trial, best loss: -0.8179433368310598]




SCORE:                                                                           
0.8079748163693599                                                               
🏃 View run resilient-mink-54 at: http://localhost:8080/#/experiments/503687084130154334/runs/e05fe543cd3f489c9eae54dacd35bc66

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 13%|█▎        | 4/30 [00:13<01:26,  3.34s/trial, best loss: -0.8179433368310598]




SCORE:                                                                           
0.8058761804826863                                                               
🏃 View run popular-wolf-65 at: http://localhost:8080/#/experiments/503687084130154334/runs/af8df71100ea4650be90d8b55ed7cac8

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 17%|█▋        | 5/30 [00:16<01:22,  3.29s/trial, best loss: -0.8179433368310598]




SCORE:                                                                           
0.8079748163693599                                                               
🏃 View run masked-mouse-204 at: http://localhost:8080/#/experiments/503687084130154334/runs/a296fd84754e49e3bf333415268f736d

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 20%|██        | 6/30 [00:20<01:18,  3.28s/trial, best loss: -0.8179433368310598]




SCORE:                                                                           
0.7922350472193075                                                               
🏃 View run hilarious-rook-209 at: http://localhost:8080/#/experiments/503687084130154334/runs/7ec531bb78e2408a8a2770725160c535

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 23%|██▎       | 7/30 [00:23<01:13,  3.18s/trial, best loss: -0.8179433368310598]




SCORE:                                                                           
0.8090241343126967                                                               
🏃 View run crawling-bird-621 at: http://localhost:8080/#/experiments/503687084130154334/runs/89d7b3dcebf840e4b639a0900fe6e5fa

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 27%|██▋       | 8/30 [00:26<01:10,  3.18s/trial, best loss: -0.8179433368310598]




SCORE:                                                                           
0.7948583420776495                                                               
🏃 View run languid-bear-51 at: http://localhost:8080/#/experiments/503687084130154334/runs/ab2c9ed9d06842e5bc629abf19b9aef3

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 30%|███       | 9/30 [00:29<01:06,  3.17s/trial, best loss: -0.8179433368310598]




SCORE:                                                                           
0.8247639034627492                                                               
🏃 View run exultant-mink-989 at: http://localhost:8080/#/experiments/503687084130154334/runs/faf51c1311c043a99c7998df905d8bd7

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334    

 33%|███▎      | 10/30 [00:32<01:04,  3.23s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8111227701993704                                                                
🏃 View run abrasive-gull-6 at: http://localhost:8080/#/experiments/503687084130154334/runs/381adf27cd334c9ead1da2f4da6b1980

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 37%|███▋      | 11/30 [00:36<01:01,  3.23s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8079748163693599                                                                
🏃 View run bittersweet-steed-27 at: http://localhost:8080/#/experiments/503687084130154334/runs/1e5ff876ecfb494fa18d43ad1a403e3b

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 40%|████      | 12/30 [00:39<00:57,  3.17s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8058761804826863                                                                
🏃 View run peaceful-shark-502 at: http://localhost:8080/#/experiments/503687084130154334/runs/e15e8a9510084b24bb2626df71c8deec

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 43%|████▎     | 13/30 [00:42<00:52,  3.10s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.804302203567681                                                                 
🏃 View run delightful-carp-161 at: http://localhost:8080/#/experiments/503687084130154334/runs/42dfeff06dbd4f8e858aaca3e8584258

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 47%|████▋     | 14/30 [00:44<00:48,  3.01s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.79800629590766                                                                  
🏃 View run masked-vole-875 at: http://localhost:8080/#/experiments/503687084130154334/runs/ce94792bb872403b87ab58bd18a68081

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 50%|█████     | 15/30 [00:47<00:44,  2.96s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8037775445960126                                                                
🏃 View run welcoming-tern-943 at: http://localhost:8080/#/experiments/503687084130154334/runs/28e4f95c5fb84b0cad5f676a41a7ab53

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 53%|█████▎    | 16/30 [00:50<00:41,  2.93s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8084994753410283                                                                
🏃 View run worried-wasp-222 at: http://localhost:8080/#/experiments/503687084130154334/runs/48865cf1bac3448a8ac5f771d34ae0dc

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 57%|█████▋    | 17/30 [00:53<00:38,  2.95s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8053515215110179                                                                
🏃 View run resilient-ram-784 at: http://localhost:8080/#/experiments/503687084130154334/runs/faa248a62ecf45849491cae0d31d9874

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 60%|██████    | 18/30 [00:56<00:35,  2.93s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8074501573976915                                                                
🏃 View run sneaky-zebra-826 at: http://localhost:8080/#/experiments/503687084130154334/runs/efdfb185eafb48e2b6512ee075ef4988

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 63%|██████▎   | 19/30 [00:59<00:31,  2.90s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8137460650577125                                                                
🏃 View run treasured-cow-113 at: http://localhost:8080/#/experiments/503687084130154334/runs/1e25f560529f40119a1148c087fdff51

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 67%|██████▋   | 20/30 [01:02<00:29,  2.95s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8163693599160545                                                                
🏃 View run hilarious-robin-875 at: http://localhost:8080/#/experiments/503687084130154334/runs/c3b1e72b635645fe855e899278388130

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 70%|███████   | 21/30 [01:05<00:27,  3.03s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8237145855194123                                                                
🏃 View run selective-mouse-89 at: http://localhost:8080/#/experiments/503687084130154334/runs/8c2be6f9903f41bc809da70ab430f65a

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 73%|███████▎  | 22/30 [01:08<00:24,  3.07s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8147953830010494                                                                
🏃 View run mercurial-gull-453 at: http://localhost:8080/#/experiments/503687084130154334/runs/5a65001473a74bb6ba674ebf6531d24f

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 77%|███████▋  | 23/30 [01:11<00:21,  3.04s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8126967471143757                                                                
🏃 View run capable-mouse-490 at: http://localhost:8080/#/experiments/503687084130154334/runs/f6b77396890e446a95ab6f0bda57668f

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 80%|████████  | 24/30 [01:14<00:18,  3.00s/trial, best loss: -0.8247639034627492]




SCORE:                                                                            
0.8268625393494229                                                                
🏃 View run handsome-mare-492 at: http://localhost:8080/#/experiments/503687084130154334/runs/7ce9b6d548bd44b2a6bddf9d0b8513fc

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 83%|████████▎ | 25/30 [01:17<00:14,  2.96s/trial, best loss: -0.8268625393494229]




SCORE:                                                                            
0.7959076600209863                                                                
🏃 View run popular-ant-857 at: http://localhost:8080/#/experiments/503687084130154334/runs/d8a9c63df0d346ce821e7ebb2209cefd

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 87%|████████▋ | 26/30 [01:20<00:11,  2.94s/trial, best loss: -0.8268625393494229]




SCORE:                                                                            
0.8179433368310598                                                                
🏃 View run victorious-lamb-939 at: http://localhost:8080/#/experiments/503687084130154334/runs/e297144c62c54edb8cafde1613d20968

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 90%|█████████ | 27/30 [01:23<00:08,  2.98s/trial, best loss: -0.8268625393494229]




SCORE:                                                                            
0.8231899265477439                                                                
🏃 View run languid-skunk-736 at: http://localhost:8080/#/experiments/503687084130154334/runs/4e48ac13845a4441969284d40b74a554

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 93%|█████████▎| 28/30 [01:26<00:05,  2.99s/trial, best loss: -0.8268625393494229]




SCORE:                                                                            
0.8058761804826863                                                                
🏃 View run kindly-rat-54 at: http://localhost:8080/#/experiments/503687084130154334/runs/d28654b90d0941b4bc308ff08be04f8c

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

 97%|█████████▋| 29/30 [01:29<00:02,  2.94s/trial, best loss: -0.8268625393494229]




SCORE:                                                                            
0.810598111227702                                                                 
🏃 View run shivering-seal-955 at: http://localhost:8080/#/experiments/503687084130154334/runs/3efc803b0d89433c8ae6464144f78552

🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334     

100%|██████████| 30/30 [01:32<00:00,  3.07s/trial, best loss: -0.8268625393494229]




Best parameters: {'colsample_bytree': 0.7200600780671598, 'gamma': 1.6523193100403246, 'max_depth': 12.0, 'min_child_weight': 8.0, 'n_estimators': 190.0, 'reg_alpha': 40.0, 'reg_lambda': 0.8374806119909937}
Best eval rmse: -0.8268625393494229
🏃 View run bustling-goose-55 at: http://localhost:8080/#/experiments/503687084130154334/runs/258f3da1ff74463fa79355c959d4b8f5
🧪 View experiment at: http://localhost:8080/#/experiments/503687084130154334


## Load the best model artifact

In [140]:
# download model artifact directory
mlflow.artifacts.download_artifacts("mlflow-artifacts:/503687084130154334/258f3da1ff74463fa79355c959d4b8f5/artifacts/model/model.xgb", dst_path="best_model")
print("Done")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Done


In [141]:
# Load model
model = XGBClassifier()
model.load_model("./best_model/model.xgb")

In [154]:
predict_y = model.predict(X_test)
accuracy = accuracy_score(y_test, predict_y>0.5)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8268625393494229


In [155]:
model.predict(np.expand_dims(np.array(X_test.loc[0, :]), 0))

array([1])