# Iris Dataset Logistic Regression

This notebook replicates the functionality of the `test.py` script, which trains a logistic regression model on the Iris dataset, evaluates it, and logs the experiment using MLflow.

In [1]:
import mlflow
from mlflow.models import infer_signature
import mlflow.sklearn
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [16]:
def load_data():
    # Load the Iris dataset
    X, y = datasets.load_iris(return_X_y=True)
    return train_test_split(X, y, test_size=0.2, random_state=42)


def train_model(X_train, y_train):
    # Train the model
    lr = LogisticRegression(solver="lbfgs", max_iter=1000)
    lr.fit(X_train, y_train)
    return lr


def evaluate_model(model, X_test, y_test):
    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy, y_pred


def log_experiment(accuracy, model, X_train):
    # Log the experiment
    params = {
        "solver": "lbfgs",
        "max_iter": 1000,
        "multi_class": "auto",
        "random_state": 8888,
    }
    mlflow.set_experiment("Démarrage rapide MLflow")
    with mlflow.start_run():
        mlflow.log_params(params)
        mlflow.set_tag("Info_d_entrainement", "Modèle LR de base pour les données iris")
        signature = infer_signature(X_train, model.predict(X_train))
        mlflow.log_metric("accuracy", accuracy)
        model_info = mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path="modele_iris",
            signature=signature,
            input_example=X_train,
            registered_model_name="demarrage-rapide-suivi",
        )
    return model_info


def load_and_predict(model_info, X_test, y_test):
    # Load the model and make predictions
    loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
    predictions = loaded_model.predict(X_test)
    iris_feature_names = datasets.load_iris().feature_names
    result = pd.DataFrame(X_test, columns=iris_feature_names)
    result["actual_class"] = y_test
    result["predicted_class"] = predictions
    return result

In [17]:
# Main execution
X_train, X_test, y_train, y_test = load_data()
model = train_model(X_train, y_train)

In [18]:
accuracy, y_pred = evaluate_model(model, X_test, y_test)

In [19]:
model_info = log_experiment(accuracy, model, X_train)

Registered model 'demarrage-rapide-suivi' already exists. Creating a new version of this model...
Created version '2' of model 'demarrage-rapide-suivi'.
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 1169.91it/s]


In [20]:
result = load_and_predict(model_info, X_test, y_test)
print(result[:4])

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                6.1               2.8                4.7               1.2   
1                5.7               3.8                1.7               0.3   
2                7.7               2.6                6.9               2.3   
3                6.0               2.9                4.5               1.5   

   actual_class  predicted_class  
0             1                1  
1             0                0  
2             2                2  
3             1                1  
