In [7]:
import os
import pickle
import click
import mlflow
import numpy as np
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from hyperopt.pyll import scope
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_squared_error

In [2]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("random-forest-hyperopt")

2025/06/22 22:27:13 INFO mlflow.tracking.fluent: Experiment with name 'random-forest-hyperopt' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/559611738701900722', creation_time=1750595233626, experiment_id='559611738701900722', last_update_time=1750595233626, lifecycle_stage='active', name='random-forest-hyperopt', tags={}>

In [3]:
def load_pickle(filename: str):
    with open(filename, "rb") as f_in:
        return pickle.load(f_in)


In [8]:
def run_optimization(data_path: str = "./output", num_trials: int = 15):
    X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl"))
    X_val,   y_val   = load_pickle(os.path.join(data_path, "val.pkl"))

    def objective(params):
        with mlflow.start_run():
            mlflow.log_params(params)

            rf = RandomForestRegressor(**params)
            rf.fit(X_train, y_train)
            y_pred = rf.predict(X_val)

            mse  = mean_squared_error(y_val, y_pred)

            rmse = float(np.sqrt(mse))
            mlflow.log_metric("rmse", rmse)

            return {'loss': rmse, 'status': STATUS_OK}

    search_space = {
        'max_depth':        scope.int(hp.quniform('max_depth', 1, 20, 1)),
        'n_estimators':     scope.int(hp.quniform('n_estimators', 10, 50, 1)),
        'min_samples_split':scope.int(hp.quniform('min_samples_split', 2, 10, 1)),
        'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 4, 1)),
        'random_state':     42
    }
    rstate = np.random.default_rng(42)
    fmin(fn=objective,
         space=search_space,
         algo=tpe.suggest,
         max_evals=num_trials,
         trials=Trials(),
         rstate=rstate)

if __name__ == '__main__':
    run_optimization()


🏃 View run bemused-moth-681 at: http://127.0.0.1:5000/#/experiments/559611738701900722/runs/f6c2f01c41b94e3088ad3ef28cb73fd5

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/559611738701900722                                          

🏃 View run mercurial-calf-266 at: http://127.0.0.1:5000/#/experiments/559611738701900722/runs/ef572dfc3fd746e3b35463ee844ecba9

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/559611738701900722                                          

🏃 View run resilient-pug-341 at: http://127.0.0.1:5000/#/experiments/559611738701900722/runs/1b95ca55c75641c7847152b57ae2e137

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/559611738701900722                                          

🏃 View run casual-horse-935 at: http://127.0.0.1:5000/#/experiments/559611738701900722/runs/f1c87bbfc21541e5b15bedf850e4c05d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/559611738701900722                                          

🏃 View run gr