In [5]:
from cohirf.experiment.hpo_classification_clustering_experiment import HPOClassificationClusteringExperiment
from pathlib import Path
from cohirf.models.batch_cohirf import BatchCoHiRF
from cohirf.models.cohirf import BaseCoHiRF
from sklearn.cluster import KMeans
import optuna

In [6]:
results_dir = Path("/home/belucci/code/cohirf/results") / "samples"
mlflow_tracking_uri = f"sqlite:///{results_dir}/mlflow.db"

In [7]:
experiment_params = dict(
    mlflow_tracking_uri=mlflow_tracking_uri,
    check_if_exists=False,
	verbose=1,
)

# Example of running one of the defined tested models

In [8]:
experiment = HPOClassificationClusteringExperiment(
    # hpo
    n_trials=20,
    hpo_seed=0,
    hpo_metric="adjusted_rand",
    direction="maximize",
    # model
    experiment_name="test",
    model="KMeans",
    seed_model=0,
    # dataset
    seed_dataset=0,
    n_samples=1000,
    n_informative=10,
    n_random=10000,
    n_classes=5,
    class_sep=10 * (10**0.5),
    **experiment_params,
)
result = experiment.run(return_results=True)[0]
ari = result["evaluate_model_return"]["best/adjusted_rand"]
hpo_time = result["fit_model_return"]["elapsed_time"]
best_time = result["evaluate_model_return"]["best/elapsed_time"]

Combinations completed:   0%|          | 0/1 [00:00<?, ?it/s]

2025/07/19 18:54:34 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/07/19 18:54:34 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

Trials:   0%|          | 0/20 [00:00<?, ?it/s]

# Example of running a custom model

In [9]:
model_cls = BatchCoHiRF
model_params = dict(
    cohirf_model=BaseCoHiRF,
    cohirf_kwargs=dict(base_model=KMeans, max_iter=1),
    n_batches=10,
    n_jobs=10,
)
search_space = dict(
    cohirf_kwargs=dict(
        n_features=optuna.distributions.FloatDistribution(0.1, 1),
        repetitions=optuna.distributions.IntDistribution(1, 10),
        base_model_kwargs=dict(
            n_clusters=optuna.distributions.IntDistribution(2, 5),
        ),
    )
)
default_values = [
    dict(
        cohirf_kwargs=dict(
            n_features=0.3,
            repetitions=5,
            base_model_kwargs=dict(
                n_clusters=3,
            ),
        )
    ),
]

In [10]:
experiment = HPOClassificationClusteringExperiment(
    # hpo
    n_trials=20,
    hpo_seed=0,
    hpo_metric="adjusted_rand",
    direction="maximize",
    # model
    model=model_cls,
    model_params=model_params,
    search_space=search_space,
    default_values=default_values,
    # dataset
    seed_dataset=0,
    n_samples=1000,
    n_informative=10,
    n_random=10000,
    n_classes=5,
    class_sep=10 * (10**0.5),
    **experiment_params,
)
result = experiment.run(return_results=True)[0]
ari = result["evaluate_model_return"]["best/adjusted_rand"]
hpo_time = result["fit_model_return"]["elapsed_time"]
best_time = result["evaluate_model_return"]["best/elapsed_time"]

Combinations completed:   0%|          | 0/1 [00:00<?, ?it/s]

Trials:   0%|          | 0/20 [00:00<?, ?it/s]