In [10]:
import os
import pickle
import click
import mlflow
import numpy as np
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from hyperopt.pyll import scope
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [11]:
# Set the tracking URI to the same one used in your UI
mlflow.set_tracking_uri("http://127.0.0.1:5003")  

# Create or set the experiment
mlflow.set_experiment("random-forest-hyperopt")

<Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment-tracking/homework/artifacts_local/2', creation_time=1746781142000, experiment_id='2', last_update_time=1746781142000, lifecycle_stage='active', name='random-forest-hyperopt', tags={}>

In [12]:
def load_pickle(filename: str):
    with open(filename, "rb") as f_in:
        return pickle.load(f_in)

In [16]:
def run_optimization(data_path: str = "./output", num_trials: int = 20):

    X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl"))
    X_val, y_val = load_pickle(os.path.join(data_path, "val.pkl"))

    def objective(params):
        
        with mlflow.start_run(run_name="RandomForest"):
            mlflow.set_tag("developer","Dario")
            mlflow.set_tag("model", "RandomForest")

            rf = RandomForestRegressor(**params)
            rf.fit(X_train, y_train)
            y_pred = rf.predict(X_val)
            rmse = mean_squared_error(y_val, y_pred, squared=False)

            # Log RMSE and hyperparameters to MLflow
            mlflow.log_metric("rmse", rmse)
            mlflow.log_params(params)

            return {'loss': rmse, 'status': STATUS_OK}

    search_space = {
        'max_depth': scope.int(hp.quniform('max_depth', 1, 20, 1)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 10, 50, 1)),
        'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1)),
        'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 4, 1)),
        'random_state': 42
    }

    rstate = np.random.default_rng(42)  # for reproducible results
    fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=num_trials,
        trials=Trials(),
        rstate=rstate
    )


if __name__ == '__main__':
    run_optimization()

🏃 View run RandomForest at: http://127.0.0.1:5003/#/experiments/2/runs/0dc73f65c66844589ae1d833aa7cdef2                                                                                  

🧪 View experiment at: http://127.0.0.1:5003/#/experiments/2                                                                                                                              

🏃 View run RandomForest at: http://127.0.0.1:5003/#/experiments/2/runs/96d4fe7adbbb42b3936ffc247611680b                                                                                  

🧪 View experiment at: http://127.0.0.1:5003/#/experiments/2                                                                                                                              

🏃 View run RandomForest at: http://127.0.0.1:5003/#/experiments/2/runs/fe0c3b35ba8949ea80cb41b0c17c3f53                                                                                  

🧪 View experiment at: http://127.0.0.1:5003/#/experiments/2      

In [17]:
print(f"Default artifacts URI: '{mlflow.get_artifact_uri()}'")

Default artifacts URI: '/workspaces/mlops-zoomcamp/02-experiment-tracking/homework/artifacts_local/2/b08015df98ac4c6fa55c37b91775c86a/artifacts'
