In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta
from sqlalchemy import create_engine
from dotenv import load_dotenv, find_dotenv
from mlflow.tracking import MlflowClient
import mlflow.sklearn
import joblib

_ = load_dotenv(find_dotenv())

In [2]:
base = pd.read_csv("../data/WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [3]:
cat_cols = ['gender', 'MultipleLines', 'InternetService', 'OnlineSecurity', 
            'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 
            'StreamingMovies', 'Contract', 'PaymentMethod']

num_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']

bin_cols = ['SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']

target_col = ['Churn']

dataset = (base
 .drop_duplicates(keep='first')
 .drop(index=base[base['TotalCharges'] == ' '].index)
 [cat_cols + num_cols + bin_cols]
 .assign(Partner = lambda x: x.Partner.map({'Yes': 1, 'No': 0}),
        Dependents = lambda x: x.Dependents.map({'Yes': 1, 'No': 0}),
        PhoneService = lambda x: x.PhoneService.map({'Yes': 1, 'No': 0}),
        PaperlessBilling = lambda x: x.PaperlessBilling.map({'Yes': 1, 'No': 0})
        )
 .sample(50, random_state=11)
)

In [4]:
mlflow.set_tracking_uri("http://localhost:5000")

client = MlflowClient()

models = client.search_registered_models()

print("Models registered in MLflow:")
for model in models:
    print(f"Model name: {model.name}")

Models registered in MLflow:
Model name: telco_customer_churn


In [5]:
model_name = "telco_customer_churn"
model_alias = "champion"

In [6]:
model_version_details = client.get_model_version_by_alias(model_name, model_alias)
artifact_path = 'preprocessing/preprocessing_pipeline.pkl'
local_path = mlflow.artifacts.download_artifacts(run_id=model_version_details.run_id, artifact_path=artifact_path, dst_path='./assets')
preprocessor = joblib.load(local_path)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 57.92it/s]


In [7]:
dataset_preprocessed = preprocessor.transform(dataset)

In [8]:
# Load the model from the MLflow model registry
model_uri = f"models:/{model_name}@{model_alias}"
model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 34.87it/s]


In [9]:
pred = model.predict(dataset_preprocessed)
pred_prob = model.predict_proba(dataset_preprocessed)

In [10]:
pred, pred_prob

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
        0, 1, 0, 0, 0, 1], dtype=int64),
 array([[0.65915093, 0.34084907],
        [0.69639831, 0.30360169],
        [0.95986533, 0.04013467],
        [0.99237263, 0.00762737],
        [0.93742711, 0.06257289],
        [0.81981005, 0.18018995],
        [0.92266259, 0.07733741],
        [0.73486063, 0.26513937],
        [0.93755569, 0.06244431],
        [0.77186951, 0.22813049],
        [0.63886534, 0.36113466],
        [0.60158855, 0.39841145],
        [0.98365125, 0.01634875],
        [0.89250902, 0.10749098],
        [0.56096542, 0.43903458],
        [0.86972371, 0.13027629],
        [0.47961746, 0.52038254],
        [0.9914901 , 0.0085099 ],
        [0.59930815, 0.40069185],
        [0.51425938, 0.48574062],
        [0.72069956, 0.27930044],
        [0.90042401, 0.09957599],
        [0.98618918, 0.01381082],
        [0.99896094, 0.001039

In [11]:
dataset['Prediction'] = pred
dataset['Prediction_Proba'] = pred_prob[:, 1]

In [12]:
dataset

Unnamed: 0,gender,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,...,tenure,MonthlyCharges,TotalCharges,SeniorCitizen,Partner,Dependents,PhoneService,PaperlessBilling,Prediction,Prediction_Proba
6466,Male,Yes,Fiber optic,No,No,No,No,No,No,Month-to-month,...,18,74.15,1387.0,0,0,0,1,0,0,0.340849
5615,Male,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,...,1,20.2,20.2,0,0,0,1,1,0,0.303602
5344,Male,No,DSL,Yes,Yes,Yes,Yes,Yes,No,Two year,...,71,76.9,5522.7,0,0,1,1,1,0,0.040135
5564,Male,Yes,DSL,Yes,No,No,No,No,No,Two year,...,35,55.25,1924.1,0,0,0,1,0,0,0.007627
588,Male,Yes,Fiber optic,Yes,Yes,Yes,No,No,Yes,Two year,...,72,99.15,7422.1,0,1,0,1,1,0,0.062573
2764,Male,Yes,DSL,Yes,Yes,Yes,Yes,No,No,Month-to-month,...,30,67.6,2000.2,0,1,1,1,0,0,0.18019
1247,Female,Yes,Fiber optic,No,Yes,Yes,No,No,No,One year,...,24,83.15,2033.05,0,1,1,1,0,0,0.077337
2391,Male,Yes,DSL,No,No,No,No,Yes,No,Month-to-month,...,8,59.25,436.6,0,0,0,1,1,0,0.265139
2832,Male,Yes,DSL,No,Yes,No,Yes,Yes,No,One year,...,40,70.75,2921.75,0,1,0,1,0,0,0.062444
2700,Female,No phone service,DSL,No,No,No,No,No,No,Month-to-month,...,35,24.15,812.5,0,0,0,0,0,0,0.22813
