In [6]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
import mlflow
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score
import pickle

In [2]:
df_final = pd.read_csv('../data/processed/df_final.csv')

In [3]:
seed = 17
x_train, x_test, y_train, y_test = train_test_split(df_final.drop('Churn', axis=1), df_final['Churn'], test_size=0.2, random_state=seed)

In [4]:
name_model = "AdaBoost"
developer = "RafaJBZ"

parameters = {
    "n_estimators": 50, 
    "random_state": 42,
    "learning_rate": 1.0
}
                  
with mlflow.start_run(run_name=name_model):
    
    mlflow.log_param("model", name_model)
    mlflow.log_param("developer", developer)
    
    # Registrar parámetros del modelo
    mlflow.log_params(parameters)
    
    # Entrena un clasificador AdaBoost
    ada_clf = AdaBoostClassifier(**parameters)
    ada_clf.fit(x_train, y_train)
    
    # Realizar predicciones
    predictions = ada_clf.predict(x_test)
    predictions_proba = ada_clf.predict_proba(x_test)[:, 1]

    # Calcular métricas
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    roc_auc = roc_auc_score(y_test, predictions_proba)

    # Registrar métricas
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    
    # Registrar el modelo
    mlflow.sklearn.log_model(ada_clf, "ada_boost_classifier")
    



In [7]:
name_model = "XGBoost"
developer = "RafaJBZ"

parameters = {
    "n_estimators": 50, 
    "random_state": seed,
    "learning_rate": 1.0
}

with mlflow.start_run(run_name=name_model):
    
    mlflow.log_param("model", name_model)
    mlflow.log_param("developer", developer)
    
    # Registrar parámetros del modelo
    mlflow.log_params(parameters)
    
    # Entrena un clasificador AdaBoost
    xgb_clf = xgb.XGBClassifier(**parameters)
    xgb_clf.fit(x_train, y_train)
    
    # Realizar predicciones
    predictions = xgb_clf.predict(x_test)
    predictions_proba = xgb_clf.predict_proba(x_test)[:, 1]

    # Calcular métricas
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    roc_auc = roc_auc_score(y_test, predictions_proba)

    # Registrar métricas
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    
    # Registrar el modelo
    mlflow.sklearn.log_model(xgb_clf, "xgboost_classifier")




In [8]:
from sklearn.linear_model import LogisticRegression

name_model = "LogisticRegression"
developer = "RafaJBZ"

parameters = {
    'penalty': 'l2',
    'C': 1.0,
    'random_state': seed
}

with mlflow.start_run(run_name=name_model):
    
    mlflow.log_param("model", name_model)
    mlflow.log_param("developer", developer)
    
    # Registrar parámetros del modelo
    mlflow.log_params(parameters)
    
    # Entrena un clasificador AdaBoost
    lr_clf = LogisticRegression(**parameters)
    lr_clf.fit(x_train, y_train)
    
    # Realizar predicciones
    predictions = lr_clf.predict(x_test)
    predictions_proba = lr_clf.predict_proba(x_test)[:, 1]

    # Calcular métricas
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    roc_auc = roc_auc_score(y_test, predictions_proba)

    # Registrar métricas
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    
    # Registrar el modelo
    mlflow.sklearn.log_model(lr_clf, "logistic_regression_classifier")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
