In [1]:
# Setup Python path to import from project root
import sys
import os
import glob
from datetime import datetime
from kl_clustering_analysis.benchmarking import benchmark_cluster_algorithm

# Ensure project root is in path
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.insert(0, project_root)


def remove_previous_analysis():
    """Clean up previous plotting artifacts.

    Removes any leftover PNGs and previously created PDFs in the
    `cluster_tree_plots` folder so the run produces only fresh PDFs.
    """
    results_folder = "../cluster_tree_plots"
    if os.path.exists(results_folder):
        # Remove PNGs if present (should be none by default) and old PDFs
        for pattern in ("*.png", "*.pdf"):
            files = glob.glob(os.path.join(results_folder, pattern))
            for f in files:
                try:
                    os.remove(f)
                    print(f"Deleted previous result: {f}")
                except Exception as e:
                    print(f"Error deleting {f}: {e}")
    else:
        os.makedirs(results_folder, exist_ok=True)

    # Clear all kl_clustering_analysis and tests modules to force fresh import
    modules_to_clear = [key for key in list(sys.modules.keys()) 
                        if 'kl_clustering_analysis' in key or key.startswith('tests')]
    for mod in modules_to_clear:
        del sys.modules[mod]

print("All modules reloaded successfully")

All modules reloaded successfully


In [None]:
from tests.test_cases_config import SMALL_TEST_CASES
# Run validation with SMALL test cases and UMAP plotting enabled, producing PDFs only
df_results, fig = benchmark_cluster_algorithm(
    test_cases=SMALL_TEST_CASES,
    significance_level=0.30,
    verbose=True,
    plot_umap=True,
    plot_manifold=True,
    concat_plots_pdf=True,
    save_individual_plots=False,
)

# Save validation results to results folder
current_date = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
results_file = f"../results/validation_results_{current_date}.csv"
df_results.to_csv(results_file, index=False)
print(f"Validation results saved to {results_file}")
print("PDFs (k-distance, tree, UMAP) written to ../cluster_tree_plots")

  Creating UMAP comparison plot for test case 1...
  Creating UMAP comparison plot for test case 2...
  Creating UMAP comparison plot for test case 3...
  Creating manifold plot for manifold_case_1_KL_Divergence_tree_distance_metric-rogerstanimoto_tree_linkage_method-average.png...
  Creating manifold plot for manifold_case_1_Leiden_n_neighbors-None_resolution-1p0.png...
  Creating manifold plot for manifold_case_1_Louvain_n_neighbors-None_resolution-1p0.png...
  Creating manifold plot for manifold_case_1_DBSCAN_eps-None_min_samples-5.png...
  Creating manifold plot for manifold_case_1_OPTICS_min_cluster_size-5_min_samples-5_xi-0p05.png...
  Creating manifold plot for manifold_case_1_HDBSCAN_min_cluster_size-5_min_samples-None.png...
  Creating manifold plot for manifold_case_2_KL_Divergence_tree_distance_metric-rogerstanimoto_tree_linkage_method-average.png...
  Creating manifold plot for manifold_case_2_Leiden_n_neighbors-None_resolution-1p0.png...
  Creating manifold plot for manifo