In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import mlflow

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve

In [2]:
# Read data
print("Reading data...")
df = pd.read_csv('data/diabetes.csv')

Reading data...


In [3]:
# Split data into features and labels
print("Splitting data...")
X = df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']].values
y = df['Outcome'].values

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

Splitting data...


In [4]:
# Set up the experiment in MLflow
experiment_name = "mlflow-experiment-diabetes"
mlflow.set_experiment(experiment_name)

2024/07/27 12:49:10 INFO mlflow.tracking.fluent: Experiment with name 'mlflow-experiment-diabetes' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///C:/Users/LENOVO/Documents/Data%20Science/Google/MLPractice/MLOpsFundamentals/Practices%20MLflow/mlruns/278332242791271182', creation_time=1722098950097, experiment_id='278332242791271182', last_update_time=1722098950097, lifecycle_stage='active', name='mlflow-experiment-diabetes', tags={}>

In [5]:
def train_and_log_model(model, model_name, **params):
    with mlflow.start_run():
        # Activar el registro automático de scikit-learn
        mlflow.sklearn.autolog()
        
        # Entrenar el modelo
        model.fit(X_train, y_train)
        
        # Hacer predicciones y calcular precisión
        y_hat = model.predict(X_test)
        acc = np.average(y_hat == y_test)
        
        # Registrar parámetros y métricas en MLflow
        mlflow.log_params(params)
        mlflow.log_metric("Accuracy", acc)
        mlflow.log_param("model", model_name)

In [6]:
# Train and log models
train_and_log_model(LogisticRegression(C=1/0.1, solver="liblinear"), "LogisticRegression", regularization_rate=0.1)
train_and_log_model(LogisticRegression(C=1/0.01, solver="liblinear"), "LogisticRegression", regularization_rate=0.01)
train_and_log_model(DecisionTreeClassifier(), "DecisionTreeClassifier")



#### Run the following command to start the MLflow server:
```bash
mlflow ui
```
