<a href="https://colab.research.google.com/github/BanafshehHassani/Log-Runs-To-Experiment/blob/main/MLflow_Experiment_for_Diabetes_Prediction_with_Random_Forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

MLflow Experiment for Diabetes Prediction with Random Forest

Author [Banafsheh Hassani](https://www.linkedin.com/in/banafsheh-hassani-7b063a129/)

This project demonstrates the use of MLflow to track machine learning training runs in a notebook environment. It includes creating and training a Random Forest model on a diabetes dataset, logging model parameters and metrics, and organizing runs within an experiment.

In [1]:
# Install mlflow
!pip install mlflow

# Import necessary packages
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

# Load dataset
db = load_diabetes()
X = db.data
y = db.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Set the experiment name
experiment_name = "diabetes_experiment"
mlflow.set_experiment(experiment_name)

# Start the MLflow run
with mlflow.start_run():
    n_estimators = 100
    max_depth = 6
    max_features = 3

    # Create and train the model
    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)

    # Make predictions
    predictions = rf.predict(X_test)

    # Log parameters
    mlflow.log_param("num_trees", n_estimators)
    mlflow.log_param("maxdepth", max_depth)
    mlflow.log_param("max_feat", max_features)

    # Log the model
    mlflow.sklearn.log_model(rf, "random-forest-model")

    # Create metrics
    mse = mean_squared_error(y_test, predictions)

    # Log metrics
    mlflow.log_metric("mse", mse)


Collecting mlflow
  Downloading mlflow-2.4.1-py3-none-any.whl (18.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.1/18.1 MB[0m [31m75.9 MB/s[0m eta [36m0:00:00[0m
Collecting databricks-cli<1,>=0.8.7 (from mlflow)
  Downloading databricks-cli-0.17.7.tar.gz (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.5/83.5 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gitpython<4,>=2.1.0 (from mlflow)
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
Collecting importlib-metadata!=4.7.0,<7,>=3.7.0 (from mlflow)
  Downloading importlib_metadata-6.8.0-py3-none-any.whl (22 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.11.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0

2023/07/09 18:58:50 INFO mlflow.tracking.fluent: Experiment with name 'diabetes_experiment' does not exist. Creating a new experiment.
