# MLflow Example: Tracking Two Model Training Runs

In [16]:
# === Model Training and Experimentation ===

# Import necessary libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits


In [17]:

# Load dataset
data = load_digits()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Define a function to train a model and return its accuracy
def train_model(n_estimators, max_depth):
    # Train a RandomForestClassifier
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    print(f"Model with n_estimators={n_estimators}, max_depth={max_depth} achieved accuracy={accuracy:.4f}")
    return model, accuracy

In [18]:

# Train models with different hyperparameter configurations
print("Training Model 1...")
model_1, acc_1 = train_model(n_estimators=20, max_depth=5)

Training Model 1...
Model with n_estimators=20, max_depth=5 achieved accuracy=0.9389


In [19]:
print("\nTraining Model 2...")
model_2, acc_2 = train_model(n_estimators=100, max_depth=10)



Training Model 2...
Model with n_estimators=100, max_depth=10 achieved accuracy=0.9722


In [20]:
! pip install dagshub




Now adding MLFlow

In [21]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits

In [22]:
# get this code from dagshub
import dagshub
dagshub.init(repo_owner='Marc-Rougagnou', repo_name='dvc-dags-mlflow', mlflow=True)

In [23]:
data = load_digits()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

In [24]:
# Set MLflow experiment name. This will get created if it doesn't exist
experiment_name = "RandomForestExperiment_3"
mlflow.set_experiment(experiment_name)


<Experiment: artifact_location='mlflow-artifacts:/1eb2fd4623c9457da98c63af49ac864e', creation_time=1750863494039, experiment_id='1', last_update_time=1750863494039, lifecycle_stage='active', name='RandomForestExperiment_3', tags={}>

In [25]:
import mlflow
import mlflow.sklearn
import joblib

def train_and_log_model(n_estimators, max_depth):
    with mlflow.start_run():
        # Train the model
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        model.fit(X_train, y_train)

        # Evaluate
        predictions = model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)

        # Log params & metrics
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_metric("accuracy", accuracy)

        # Save & log the model manually
        local_model_path = "rf_model.pkl"
        joblib.dump(model, local_model_path)
        mlflow.log_artifact(local_model_path, artifact_path="model")

        print(f"✔️ Modèle loggué avec accuracy={accuracy:.4f}")

In [26]:
# Train and log Model 1
print("Training and Logging Model 1...")
train_and_log_model(n_estimators=20, max_depth=5)

Training and Logging Model 1...
✔️ Modèle loggué avec accuracy=0.9389
🏃 View run gregarious-pig-888 at: https://dagshub.com/Marc-Rougagnou/dvc-dags-mlflow.mlflow/#/experiments/1/runs/d9373f7c7cce46989c353c74d8a3d214
🧪 View experiment at: https://dagshub.com/Marc-Rougagnou/dvc-dags-mlflow.mlflow/#/experiments/1


In [27]:
# Train and log Model 2
print("\nTraining and Logging Model 2...")
train_and_log_model(n_estimators=100, max_depth=10)


Training and Logging Model 2...
✔️ Modèle loggué avec accuracy=0.9722
🏃 View run gaudy-roo-732 at: https://dagshub.com/Marc-Rougagnou/dvc-dags-mlflow.mlflow/#/experiments/1/runs/7ec4e7e2da264cd5ab6141c0f6467f81
🧪 View experiment at: https://dagshub.com/Marc-Rougagnou/dvc-dags-mlflow.mlflow/#/experiments/1


In [28]:
# Instructions to visualize results
print("\nTo view the results, run the following command in your terminal:")
print("mlflow ui")
print("Then navigate to http://127.0.0.1:5000 to explore the experiment results.")


To view the results, run the following command in your terminal:
mlflow ui
Then navigate to http://127.0.0.1:5000 to explore the experiment results.
