In [14]:
import os
import pandas as pd
from datetime import datetime, timedelta
from sqlalchemy import create_engine
from dotenv import load_dotenv, find_dotenv
from mlflow.tracking import MlflowClient
import mlflow.sklearn

_ = load_dotenv(find_dotenv())

In [15]:
base = pd.read_csv("../data/WA_Fn-UseC_-Telco-Customer-Churn.csv").sample(50, random_state=11)

In [16]:
cat_cols = ['gender', 'MultipleLines', 'InternetService', 'OnlineSecurity', 
            'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 
            'StreamingMovies', 'Contract', 'PaymentMethod']

num_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']

bin_cols = ['SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']

target_col = ['Churn']

dataset = (base
 .drop_duplicates(keep='first')
 .drop(index=base[base['TotalCharges'] == ' '].index)
 [cat_cols + num_cols + bin_cols]
 .assign(Partner = lambda x: x.Partner.map({'Yes': 1, 'No': 0}),
        Dependents = lambda x: x.Dependents.map({'Yes': 1, 'No': 0}),
        PhoneService = lambda x: x.PhoneService.map({'Yes': 1, 'No': 0}),
        PaperlessBilling = lambda x: x.PaperlessBilling.map({'Yes': 1, 'No': 0})
        )
)

In [17]:
mlflow.set_tracking_uri("http://localhost:5000")

client = MlflowClient()

models = client.search_registered_models()

print("Models registered in MLflow:")
for model in models:
    print(f"Model name: {model.name}")

Models registered in MLflow:
Model name: telco_customer_churn


In [18]:
model_name = "telco_customer_churn"
model_alias = "champion"

In [19]:
model_uri = f"models:/{model_name}@{model_alias}"
model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 46.55it/s]


In [20]:
model

In [21]:
pred = model.predict(dataset)
pred_prob = model.predict_proba(dataset)

In [22]:
pred, pred_prob

(array([1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 0], dtype=int64),
 array([[2.44986341e-01, 7.55013659e-01],
        [3.83402973e-01, 6.16597027e-01],
        [6.78813486e-01, 3.21186514e-01],
        [9.97129540e-01, 2.87046035e-03],
        [6.10221180e-01, 3.89778820e-01],
        [7.68208644e-01, 2.31791356e-01],
        [1.66686956e-01, 8.33313044e-01],
        [8.79861102e-01, 1.20138898e-01],
        [9.77625209e-01, 2.23747905e-02],
        [9.48745229e-01, 5.12547706e-02],
        [7.82065931e-01, 2.17934069e-01],
        [8.96861434e-01, 1.03138566e-01],
        [6.89214772e-01, 3.10785228e-01],
        [9.00424010e-01, 9.95759901e-02],
        [1.71409199e-01, 8.28590801e-01],
        [7.57487823e-01, 2.42512177e-01],
        [8.75417064e-01, 1.24582936e-01],
        [9.99296744e-01, 7.03255612e-04],
        [8.79877079e-01, 1.20122921e-01],
        [7.64

In [23]:
base['prediction'] = pred
base['probability'] = pred_prob[:, 1]
base['model_name'] = model_name
base['model_version'] = model_alias

In [24]:
PG_APP_PWD = os.getenv('PG_APP_PWD')
url_conexion = f'postgresql+psycopg2://app_user:{PG_APP_PWD}@localhost:5432/app_db'

engine = create_engine(url_conexion)

nombre_tabla = 'prediction_logs'
base.to_sql(nombre_tabla, con=engine, if_exists='append', index=False)

50

In [25]:
PG_APP_PWD = os.getenv('PG_APP_PWD')
url_conexion = f'postgresql+psycopg2://app_user:{PG_APP_PWD}@localhost:5432/app_db'

engine = create_engine(url_conexion)

nombre_tabla = 'prediction_logs'
base.to_sql(nombre_tabla, con=engine, if_exists='append', index=False)

50