In [7]:
from cohirf.experiment.hpo_spherical_clustering_experiment import HPOSphericalClusteringExperiment
from pathlib import Path
from cohirf.models.batch_cohirf import BatchCoHiRF
from cohirf.models.cohirf import BaseCoHiRF
from sklearn.cluster import DBSCAN
import optuna

In [8]:
results_dir = Path("/home/belucci/code/cohirf/results") / "spherical_clustering"
mlflow_tracking_uri = f"sqlite:///{results_dir}/mlflow.db"

In [9]:
experiment_params = dict(
    mlflow_tracking_uri=mlflow_tracking_uri,
    check_if_exists=False,
)

# Example of running one of the defined tested models

In [4]:
experiment = HPOSphericalClusteringExperiment(
	# hpo
	n_trials=20,
	hpo_seed=0,
	hpo_metric="adjusted_rand",
	direction="maximize",
	# model
	experiment_name="test-dbscan-hpo",
	model="DBSCAN",
	seed_model=0,
	# dataset
	seed_dataset=0,
	n_samples=2000,
	n_spheres=2,
	radius_separation=0.5,
	radius_std=0.01,
	verbose=0,
	**experiment_params,
)
result = experiment.run(return_results=True)[0]
ari = result["evaluate_model_return"]["best/adjusted_rand"]
hpo_time = result["fit_model_return"]["elapsed_time"]
best_time = result["evaluate_model_return"]["best/elapsed_time"]

2025/07/19 18:43:36 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/07/19 18:43:36 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


Trials:   0%|          | 0/20 [00:00<?, ?it/s]

# Example of running a custom model

In [10]:
model_cls = BatchCoHiRF
model_params = dict(
    cohirf_model=BaseCoHiRF,
    cohirf_kwargs=dict(base_model=DBSCAN, max_iter=1),
    n_batches=10,
    n_jobs=10,
)
search_space = dict(
    cohirf_kwargs=dict(
        n_features=optuna.distributions.FloatDistribution(0.1, 1),
        repetitions=optuna.distributions.IntDistribution(1, 10),
        base_model_kwargs=dict(
            eps=optuna.distributions.FloatDistribution(1e-1, 10),
            min_samples=optuna.distributions.IntDistribution(2, 50),
        ),
    )
)
default_values = [
    dict(
        cohirf_kwargs=dict(
            n_features=0.3,
            repetitions=5,
            base_model_kwargs=dict(
                eps=0.5,
                min_samples=5,
            ),
        )
    ),
]

In [11]:
experiment = HPOSphericalClusteringExperiment(
    # hpo
    n_trials=20,
    hpo_seed=0,
    hpo_metric="adjusted_rand",
    direction="maximize",
    # model
    model=model_cls,
	model_params=model_params,
	search_space=search_space,
	default_values=default_values,
    # dataset
    seed_dataset=0,
    n_samples=2000,
    n_spheres=2,
    radius_separation=0.5,
    radius_std=0.01,
    verbose=0,
    **experiment_params,
)
result = experiment.run(return_results=True)[0]
ari = result["evaluate_model_return"]["best/adjusted_rand"]
hpo_time = result["fit_model_return"]["elapsed_time"]
best_time = result["evaluate_model_return"]["best/elapsed_time"]

2025/07/19 18:46:25 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/07/19 18:46:25 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


Trials:   0%|          | 0/20 [00:00<?, ?it/s]