## Notebook model Random Forest Classifier with ML Flow

In [1]:
## Import libraries

import sys
import warnings
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature

In [2]:
# Function to evaluate metrics

def eval_metrics(actual, pred):
    accuracy = accuracy_score(actual, pred)
    precision = precision_score(actual, pred, average="weighted")
    recall = recall_score(actual, pred, average="weighted")
    f1 = f1_score(actual, pred, average="weighted")
    return accuracy, precision, recall, f1

In [3]:
warnings.filterwarnings("ignore")
np.random.seed(42)

iris = pd.read_csv("iris.csv")
X = iris.drop(["target"], axis=1)
y = iris["target"]

feature_names = iris.columns[:-1]
target_names = ["setosa", "versicolor", "virginica"]

# Split the data into training and test sets (80% - 20%)
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
n_estimators = 100
max_depth = None

# Model Random Forest Classifier
rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
rf.fit(train_x, train_y)
predictions = rf.predict(test_x)
signature = infer_signature(train_x, predictions)
input_example = train_x[:5]

# Metrucs evaluation
accuracy, precision, recall, f1 = eval_metrics(test_y, predictions)

# Affichage des résultats
print(f"Random Forest Classifier (n_estimators={n_estimators}, max_depth={max_depth}):")
print(f"  Accuracy: {accuracy}")
print(f"  Precision: {precision}")
print(f"  Recall: {recall}")
print(f"  F1 Score: {f1}")

Random Forest Classifier (n_estimators=100, max_depth=None):
  Accuracy: 1.0
  Precision: 1.0
  Recall: 1.0
  F1 Score: 1.0


In [5]:
# MLflow integration
with mlflow.start_run():
    # hyperparameters and metrics logging
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

    # model saving
    mlflow.sklearn.log_model(rf, "model", signature=signature, input_example=input_example)       

    # classification report
    report = classification_report(test_y, predictions, target_names=target_names)
    print("\nClassification Report:\n", report)
    with open("classification_report.txt", "w") as f:
        f.write(report)
    mlflow.log_artifact("classification_report.txt")

print("MLflow run completed!")


Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

MLflow run completed!
