# Crossfire Dataset - Hyperparameter Optimization with HPO Module

This notebook uses the refactored `hpo.py` module to run multiple HPO experiments with different metrics on the Crossfire dataset.

## Setup and Imports

In [21]:
import sys
sys.path.insert(0, '../code')

import pandas as pd
import numpy as np
import folium
import warnings
warnings.filterwarnings('ignore')

from hpo import run_parallel_optimization, results_to_dataframe
from scipy.spatial import ConvexHull, QhullError
from scipy.spatial.distance import pdist

print("✓ All imports loaded")

In [22]:
np.random.seed(42)

## Load Crossfire Data

In [24]:
df_geo = pd.read_csv('geodf.csv')
X = df_geo[['longitude', 'latitude']].values

print(f"✓ Data loaded: {X.shape[0]} points")
print(f"  Longitude range: [{X[:, 0].min():.4f}, {X[:, 0].max():.4f}]")
print(f"  Latitude range: [{X[:, 1].min():.4f}, {X[:, 1].max():.4f}]")
X.shape

(27738, 2)

## Map Visualization Functions

In [None]:
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib.pyplot as plt

def generate_distinct_colors(n_colors):
    """Generate n distinct colors using multiple colormaps and HSV interpolation."""
    colors = []
    
    colormaps = [plt.cm.tab20, plt.cm.Set3, plt.cm.Pastel1, plt.cm.Pastel2, 
                 plt.cm.Dark2, plt.cm.Accent, plt.cm.Paired]
    
    for cmap in colormaps:
        n_cmap_colors = cmap.N if hasattr(cmap, 'N') else 256
        for i in range(n_cmap_colors):
            if len(colors) >= n_colors:
                break
            colors.append(mcolors.to_hex(cmap(i / max(n_cmap_colors - 1, 1))))
        if len(colors) >= n_colors:
            break
    
    if len(colors) < n_colors:
        remaining = n_colors - len(colors)
        for i in range(remaining):
            hue = (i * 0.618033988749895) % 1.0
            saturation = 0.7 + (i % 3) * 0.1
            value = 0.8 + (i % 2) * 0.15
            rgb = mcolors.hsv_to_rgb([hue, saturation, value])
            colors.append(mcolors.to_hex(rgb))
    
    return colors[:n_colors]

def create_cluster_map(X, labels, algorithm_name, center_lat=-22.9, center_lon=-43.2):
    """Create a folium map with clustered points."""
    m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
    
    unique_labels = np.unique(labels)
    n_clusters = len(unique_labels[unique_labels >= 0])
    
    if n_clusters > 0:
        cluster_labels = [label for label in unique_labels if label >= 0]
        colors_list = generate_distinct_colors(n_clusters)
        color_map = {label: colors_list[i] 
                    for i, label in enumerate(cluster_labels)}
        if -1 in unique_labels or 0 in unique_labels:
            noise_label = -1 if -1 in unique_labels else 0
            color_map[noise_label] = '#000000'
    else:
        color_map = {-1: '#000000', 0: '#000000'}
    
    for i, (lon, lat) in enumerate(X):
        label = labels[i]
        color = color_map.get(label, '#808080')
        
        folium.CircleMarker(
            location=[lat, lon],
            radius=4,
            popup=f"Cluster: {label}",
            color=color,
            fill=True,
            fillColor=color,
            fillOpacity=0.7
        ).add_to(m)
    
    title_html = f'''
    <div style="position: fixed; 
                top: 10px; left: 50px; width: 400px; height: 50px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:16px; padding: 10px">
        <b>{algorithm_name}</b><br>
        Clusters: {n_clusters} | Total points: {len(X)}
    </div>
    '''
    m.get_root().html.add_child(folium.Element(title_html))
    
    return m

print("✓ Map creation function defined")

In [None]:
# Calculate center of the data
center_lat = X[:, 1].mean()
center_lon = X[:, 0].mean()

print(f"Map center: ({center_lat:.4f}, {center_lon:.4f})")

## Define Metric Functions

We'll use two different metrics:
1. **GGDS** (Geo-Granular Density Score) - Uses ConvexHull area
2. **Compact GGDS** - Penalizes elongation using cluster diameter

In [None]:
def calculate_ggds(points, labels, alpha=1.0, beta=1.0, gamma=1.0):
    """
    Calculates the Geo-Granular Density Score using Scipy.
    
    Args:
        points (np.array): (N, 2) array of coordinates.
        labels (np.array): (N,) array of cluster labels. -1 indicates noise.
        alpha (float): Weight for cluster count (Granularity).
        beta (float): Weight for density (Compactness).
        gamma (float): Weight for coverage (Noise penalty).
        
    Returns:
        float: The calculated score.
    """
    unique_labels = set(labels)
    if -1 in unique_labels:
        unique_labels.remove(-1)
    
    k = len(unique_labels)
    
    if k == 0:
        return 0.0

    n_total = len(points)
    n_clustered = 0
    total_area = 0.0
    
    for label in unique_labels:
        cluster_points = points[labels == label]
        n_in_cluster = len(cluster_points)
        n_clustered += n_in_cluster
        
        if n_in_cluster >= 3:
            try:
                hull = ConvexHull(cluster_points)
                total_area += hull.volume 
            except QhullError:
                total_area += 1e-6
        else:
            total_area += 1e-6

    coverage_ratio = n_clustered / n_total
    term_granularity = k ** alpha
    density = n_clustered / (total_area + 1e-9)
    term_density = density ** beta
    term_coverage = np.exp(gamma * coverage_ratio)

    score = term_granularity * term_density * term_coverage
    
    return score


def calculate_compact_ggds(points, labels, alpha=1.0, beta=1.0, gamma=1.0):
    """
    Calculates Geo-Granular Score penalizing elongation (Cluster Diameter).
    """
    unique_labels = set(labels)
    if -1 in unique_labels: 
        unique_labels.remove(-1)
    
    k = len(unique_labels)
    if k == 0: 
        return 0.0

    n_total = len(points)
    n_clustered = 0
    total_span_density = 0.0
    
    for label in unique_labels:
        cluster_points = points[labels == label]
        n_in_cluster = len(cluster_points)
        n_clustered += n_in_cluster
        
        diameter = 0.0
        
        if n_in_cluster >= 3:
            try:
                hull = ConvexHull(cluster_points)
                hull_vertices = cluster_points[hull.vertices]
                dists = pdist(hull_vertices, metric='euclidean')
                diameter = np.max(dists) if len(dists) > 0 else 0
            except QhullError:
                dists = pdist(cluster_points)
                diameter = np.max(dists) if len(dists) > 0 else 0
        elif n_in_cluster == 2:
            diameter = np.linalg.norm(cluster_points[0] - cluster_points[1])
        else:
            diameter = 0.0
            
        span_density = n_in_cluster / (diameter**2 + 1e-6)
        total_span_density += span_density

    avg_span_density = total_span_density / k
    coverage_ratio = n_clustered / n_total
    
    term_granularity = k ** alpha
    term_compactness = avg_span_density ** beta
    term_coverage = np.exp(gamma * coverage_ratio)
    
    score = term_granularity * term_compactness * term_coverage
    
    return score

print("✓ GGDS and Compact GGDS metrics defined")

## Configuration

In [None]:
# Hyperparameter ranges for Crossfire dataset
HYPERPARAMETER_RANGES = {
    'DTSCAN': {
        'MinPts': {'type': 'int', 'range': (2, 25)},
        "area_threshold": {'type': 'float', 'range': (-50.0, 0)},
        'length_threshold': {'type': 'float', 'range': (-50.0, 0)}
    },
    'DBSCAN': {
        'eps': {'type': 'float', 'range': (0.001, 0.1)},
        'min_samples': {'type': 'int', 'range': (3, 30)}
    },
    'KMeans': {
        'n_clusters': {'type': 'int', 'range': (2, 20)},
        'n_init': {'type': 'categorical', 'options': [5, 10, 20, 50, 100]}
    },
    'HDBSCAN': {
        'min_cluster_size': {'type': 'int', 'range': (2, 25)},
        'alpha': {'type': 'float', 'range': (0.0, 1.0)}
    },
    'ASCDT': {
        'min_cluster_size': {'type': 'int', 'range': (2, 25)},
        'beta': {'type': 'float', 'range': (-50.0, 50.0)}
    },
}

# Configuration
N_TRIALS = 50
ALGORITHMS = ['DTSCAN', 'DBSCAN', 'HDBSCAN', 'KMeans']
N_JOBS = -1

# Create datasets dictionary (no true labels for crossfire)
datasets = {
    'Crossfire': (X, np.zeros(len(X)))  # Dummy labels since we use unsupervised metrics
}

print("✓ Configuration loaded")
print(f"  Algorithms: {ALGORITHMS}")
print(f"  Trials per task: {N_TRIALS}")

## Experiment 1: HPO with GGDS Metric

In [9]:
# Run optimization with GGDS metric
results_ggds = run_parallel_optimization(
    datasets=datasets,
    algorithms=ALGORITHMS,
    metric_func=calculate_ggds,
    hyperparameter_ranges=HYPERPARAMETER_RANGES,
    n_trials=100,
    experiment_name="Crossfire_HPO_GGDS_3",
    n_jobs=N_JOBS,
    mlflow_uri="file:../mlruns",
    verbose=True
)

[2025-11-25 21:15:15] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 37/100
[2025-11-25 21:15:15] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 38/100
[2025-11-25 21:15:15] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 39/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DTSCAN, Trial: 9/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 40/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 41/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 42/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 43/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 44/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 45/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DTSCAN, Trial: 10/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 46/100
[2025-11-25 21:15:16] Dataset: Crossfire, Algorithm: DBSCAN, Trial: 47/100
[2025-11-25 21:15:17] Data

In [10]:
# Display GGDS results
df_ggds = results_to_dataframe(results_ggds)

print("\n" + "="*80)
print("GGDS METRIC RESULTS")
print("="*80)
df_ggds


GGDS METRIC RESULTS


Unnamed: 0,Dataset,Algorithm,Score,N Clusters,Expected,Time (s),Best Params
1,Crossfire,DBSCAN,100554000000.0,935,1,16.9,"{'eps': 0.007473073777533925, 'min_samples': 5}"
2,Crossfire,HDBSCAN,70102070000.0,5050,1,243.3,"{'use_scaled': True, 'min_cluster_size': 2, 'a..."
0,Crossfire,DTSCAN,367082400.0,488,1,58.5,"{'use_scaled': True, 'MinPts': 9, 'area_thresh..."
3,Crossfire,KMeans,19169510.0,20,1,266.8,"{'use_scaled': False, 'n_clusters': 20, 'n_ini..."


## Experiment 2: HPO with Compact GGDS Metric

In [11]:
# Run optimization with Compact GGDS metric
results_compact_ggds = run_parallel_optimization(
    datasets=datasets,
    algorithms=['DTSCAN', 'DBSCAN', 'HDBSCAN', 'KMeans'],
    metric_func=calculate_compact_ggds,
    hyperparameter_ranges=HYPERPARAMETER_RANGES,
    n_trials=N_TRIALS,
    experiment_name="Crossfire_HPO_Compact_GGDS",
    n_jobs=N_JOBS,
    mlflow_uri="file:../mlruns",
    verbose=True,
    use_optuna_multiprocessing=True
)


STARTING PARALLEL OPTIMIZATION
Datasets: ['Crossfire']
Algorithms: ['DTSCAN', 'DBSCAN', 'HDBSCAN', 'KMeans']
Trials per task: 50
Parallelization: Optuna (trials within each study)
Trials run in parallel: 8
Total tasks: 4

[2025-11-25 21:19:36] Dataset: Crossfire, Algorithm: DTSCAN, Trial: 1/50
[2025-11-25 21:19:41] Dataset: Crossfire, Algorithm: DTSCAN, Trial: 14/50
[2025-11-25 21:19:46] Dataset: Crossfire, Algorithm: DTSCAN, Trial: 22/50
Step 2: Building graph from triangulation...
Step 3: Removing global effects (filtering outlier edges/triangles)...
   Building Triangle objects...
[2025-11-25 21:19:51] Dataset: Crossfire, Algorithm: DTSCAN, Trial: 31/50
[2025-11-25 21:19:56] Dataset: Crossfire, Algorithm: DTSCAN, Trial: 39/50
   Computing triangle areas...
   Applying z-score normalization to areas (Eq. 2)...
   Computing edge lengths...
   Computing triangle areas...
   Applying z-score normalization to areas (Eq. 2)...
   Computing edge lengths...
[2025-11-25 21:20:01] Dataset: C

In [12]:
# Display Compact GGDS results
df_compact_ggds = results_to_dataframe(results_compact_ggds)

print("\n" + "="*80)
print("COMPACT GGDS METRIC RESULTS")
print("="*80)
print(df_compact_ggds.to_string(index=False))

df_compact_ggds

Unnamed: 0,Dataset,Algorithm,Score,N Clusters,Expected,Time (s),Best Params
2,Crossfire,HDBSCAN,55502640000.0,5047,1,99.7,"{'use_scaled': False, 'min_cluster_size': 2, '..."
1,Crossfire,DBSCAN,13143420000.0,176,1,1.8,"{'eps': 0.0024279799218272424, 'min_samples': 20}"
0,Crossfire,DTSCAN,557761700.0,478,1,30.2,"{'use_scaled': False, 'MinPts': 8, 'area_thres..."
3,Crossfire,KMeans,15670040.0,20,1,7.3,"{'use_scaled': False, 'n_clusters': 20, 'n_ini..."


## Generate Maps with Best Parameters

### GGDS Results

In [13]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN, KMeans, HDBSCAN
from dtscan import DTSCAN
from ascdt import ASCDT
import io
from contextlib import redirect_stdout

def generate_map_from_result(X, algorithm, params, center_lat, center_lon, return_labels=False):
    """Generate a map from algorithm and parameters.
    
    Parameters:
    -----------
    return_labels : bool
        If True, also return the labels array
    """
    X_scaled = StandardScaler().fit_transform(X)
    
    if algorithm == 'DTSCAN':
        model = DTSCAN(**params)
        f = io.StringIO()
        with redirect_stdout(f):
            labels = model.fit_predict(X)
    elif algorithm == 'DBSCAN':
        model = DBSCAN(**params)
        labels = model.fit_predict(X_scaled)
    elif algorithm == 'KMeans':
        model = KMeans(**params, random_state=42)
        labels = model.fit_predict(X_scaled)
    elif algorithm == 'HDBSCAN':
        model = HDBSCAN(**params)
        labels = model.fit_predict(X_scaled)
    elif algorithm == 'ASCDT':
        model = ASCDT(**params)
        labels = model.fit_predict(X)
    else:
        return None if not return_labels else (None, None)
    
    map_obj = create_cluster_map(X, labels, algorithm, center_lat, center_lon)
    return (map_obj, labels) if return_labels else map_obj

# Generate maps for GGDS results and store labels for later use
print("Generating GGDS maps...")
ggds_maps = {}
ggds_labels = {}  # Store labels for reuse
for algo in ALGORITHMS:
    if algo in results_ggds['Crossfire']:
        result = results_ggds['Crossfire'][algo]
        if result['best_params'].get('use_scaled', None) is not None: del result['best_params']['use_scaled']
        map_obj, labels = generate_map_from_result(X, algo, result['best_params'], center_lat, center_lon, return_labels=True)
        if map_obj:
            ggds_maps[algo] = map_obj
            ggds_labels[algo] = labels  # Store labels for convex hull visualization
            print(f"  ✓ {algo}: Score={result['best_score']:.4f}, Clusters={result['n_clusters']}")

In [14]:
# Display GGDS maps
"""for algo, map_obj in ggds_maps.items():
    result = results_ggds['Crossfire'][algo]
    print(f"\n{'='*60}")
    print(f"{algo} - GGDS Metric")
    print(f"{'='*60}")
    print(f"Score: {result['best_score']:.4f}")
    print(f"Clusters: {result['n_clusters']}")
    print(f"Params: {result['best_params']}")
    display(map_obj)"""

'for algo, map_obj in ggds_maps.items():\n    result = results_ggds[\'Crossfire\'][algo]\n    print(f"\n{\'=\'*60}")\n    print(f"{algo} - GGDS Metric")\n    print(f"{\'=\'*60}")\n    print(f"Score: {result[\'best_score\']:.4f}")\n    print(f"Clusters: {result[\'n_clusters\']}")\n    print(f"Params: {result[\'best_params\']}")\n    display(map_obj)'

### Compact GGDS Results

In [15]:
# Generate maps for Compact GGDS results and store labels for later use
print("Generating Compact GGDS maps...")
compact_ggds_maps = {}
compact_ggds_labels = {}  # Store labels for reuse
for algo in ALGORITHMS:
    if algo in results_compact_ggds['Crossfire']:
        result = results_compact_ggds['Crossfire'][algo]
        if result['best_params'].get('use_scaled', None) is not None: del result['best_params']['use_scaled']
        map_obj, labels = generate_map_from_result(X, algo, result['best_params'], center_lat, center_lon, return_labels=True)
        if map_obj:
            compact_ggds_maps[algo] = map_obj
            compact_ggds_labels[algo] = labels  # Store labels for convex hull visualization
            print(f"  ✓ {algo}: Score={result['best_score']:.4f}, Clusters={result['n_clusters']}")

In [16]:
# Display Compact GGDS maps
"""for algo, map_obj in compact_ggds_maps.items():
    result = results_compact_ggds['Crossfire'][algo]
    print(f"\n{'='*60}")
    print(f"{algo} - Compact GGDS Metric")
    print(f"{'='*60}")
    print(f"Score: {result['best_score']:.4f}")
    print(f"Clusters: {result['n_clusters']}")
    print(f"Params: {result['best_params']}")
    display(map_obj)"""

'for algo, map_obj in compact_ggds_maps.items():\n    result = results_compact_ggds[\'Crossfire\'][algo]\n    print(f"\n{\'=\'*60}")\n    print(f"{algo} - Compact GGDS Metric")\n    print(f"{\'=\'*60}")\n    print(f"Score: {result[\'best_score\']:.4f}")\n    print(f"Clusters: {result[\'n_clusters\']}")\n    print(f"Params: {result[\'best_params\']}")\n    display(map_obj)'

## Comparison Summary

In [17]:
# Create comparison table
comparison_data = []

for algo in ALGORITHMS:
    row = {'Algorithm': algo}
    
    if algo in results_ggds['Crossfire']:
        ggds_result = results_ggds['Crossfire'][algo]
        row['GGDS_Score'] = f"{ggds_result['best_score']:.4f}"
        row['GGDS_Clusters'] = ggds_result['n_clusters']
    else:
        row['GGDS_Score'] = 'N/A'
        row['GGDS_Clusters'] = 'N/A'
    
    if algo in results_compact_ggds['Crossfire']:
        compact_result = results_compact_ggds['Crossfire'][algo]
        row['Compact_GGDS_Score'] = f"{compact_result['best_score']:.4f}"
        row['Compact_GGDS_Clusters'] = compact_result['n_clusters']
    else:
        row['Compact_GGDS_Score'] = 'N/A'
        row['Compact_GGDS_Clusters'] = 'N/A'
    
    comparison_data.append(row)

comparison_df = pd.DataFrame(comparison_data)

comparison_df

Unnamed: 0,Algorithm,GGDS_Score,GGDS_Clusters,Compact_GGDS_Score,Compact_GGDS_Clusters
0,DTSCAN,367082420.276,488,557761689.5877,478
1,DBSCAN,100553965385.7932,935,13143421914.9834,176
2,HDBSCAN,70102067967.8418,5050,55502637491.9041,5047
3,KMeans,19169514.6802,20,15670042.7755,20


In [20]:
bests_ggds_sc = comparison_df[['Algorithm', 'GGDS_Score', 'GGDS_Clusters']]
bests_ggds_sc.to_csv('best_in_rio.csv', index=False)

## View Results in MLflow

To view all experiment results in MLflow UI:

```bash
mlflow ui --backend-store-uri file:../mlruns
```

Then open http://localhost:5000 in your browser.

In [None]:
def create_cluster_map_with_hulls(X, labels, algorithm_name, center_lat=-22.9, center_lon=-43.2):
    """
    Create a folium map showing clusters as convex hull polygons and outliers as points.
    
    Parameters:
    -----------
    X : np.ndarray
        Input data points (N, 2) array of coordinates
    labels : np.ndarray
        Cluster labels (N,) array, -1 indicates noise/outliers
    algorithm_name : str
        Name of the algorithm
    center_lat : float
        Latitude for map center
    center_lon : float
        Longitude for map center
        
    Returns:
    --------
    folium.Map : Map object with convex hull polygons and outlier points
    """
    from scipy.spatial import ConvexHull, QhullError
    
    # Create base map
    m = folium.Map(location=[-22.9843, -43.2232], zoom_start=14)
    
    # Get unique labels (excluding noise)
    unique_labels = np.unique(labels)
    n_clusters = len(unique_labels[unique_labels >= 0])
    
    # Generate colors for clusters
    if n_clusters > 0:
        cluster_labels = [label for label in unique_labels if label >= 0]
        colors_list = generate_distinct_colors(n_clusters)
        color_map = {label: colors_list[i] 
                    for i, label in enumerate(cluster_labels)}
    else:
        color_map = {}
    
    # Add convex hull polygons for each cluster
    for label in unique_labels:
        if label < 0:  # Skip noise points for now
            continue
            
        cluster_points = X[labels == label]
        
        if len(cluster_points) < 3:
            # For clusters with < 3 points, just add them as points
            for point in cluster_points:
                folium.CircleMarker(
                    location=[point[1], point[0]],  # lat, lon
                    radius=4,
                    popup=f"Cluster: {label}",
                    color=color_map.get(label, '#808080'),
                    fill=True,
                    fillColor=color_map.get(label, '#808080'),
                    fillOpacity=0.7
                ).add_to(m)
        else:
            try:
                # Calculate convex hull
                hull = ConvexHull(cluster_points)
                hull_vertices = cluster_points[hull.vertices]
                
                # Create polygon coordinates (lat, lon pairs)
                polygon_coords = [[point[1], point[0]] for point in hull_vertices]
                # Close the polygon
                polygon_coords.append(polygon_coords[0])
                
                # Add polygon to map
                folium.Polygon(
                    locations=polygon_coords,
                    popup=f"Cluster {label}",
                    color=color_map.get(label, '#808080'),
                    fill=True,
                    fillColor=color_map.get(label, '#808080'),
                    fillOpacity=0.3,
                    weight=2
                ).add_to(m)
            except QhullError:
                # Fallback: add points if hull fails
                for point in cluster_points:
                    folium.CircleMarker(
                        location=[point[1], point[0]],
                        radius=4,
                        popup=f"Cluster: {label}",
                        color=color_map.get(label, '#808080'),
                        fill=True,
                        fillColor=color_map.get(label, '#808080'),
                        fillOpacity=0.7
                    ).add_to(m)
    
    # Add outlier points (label == -1)
    outlier_points = X[labels == -1]
    for point in outlier_points:
        folium.CircleMarker(
            location=[point[1], point[0]],  # lat, lon
            radius=3,
            popup="Outlier",
            color='#000000',
            fill=True,
            fillColor='#000000',
            fillOpacity=0.8
        ).add_to(m)
    
    # Add title
    n_outliers = len(outlier_points)
    title_html = f'''
    <div style="position: fixed; 
                top: 10px; left: 50px; width: 400px; height: 60px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:16px; padding: 10px">
        <b>{algorithm_name}</b><br>
        Clusters: {n_clusters} | Outliers: {n_outliers} | Total points: {len(X)}
    </div>
    '''
    m.get_root().html.add_child(folium.Element(title_html))
    
    return m

print("✓ Convex hull map function defined")


In [None]:
# Generate maps with convex hulls for clusters and outlier points
print("Generating convex hull maps for GGDS results...")
print("="*60)

def generate_hull_maps_from_labels(X, labels_dict):
    """Generate convex hull maps using stored labels (no model re-execution)."""
    # Calculate center for maps
    center_lat = X[:, 1].mean()
    center_lon = X[:, 0].mean()
    
    hull_maps = {}
    
    for algorithm, labels in labels_dict.items():
        print(f"\n{algorithm}:")
        
        try:
            # Create convex hull map using the stored labels
            hull_maps[algorithm] = create_cluster_map_with_hulls(
                X, labels, f"{algorithm} (Convex Hulls)", center_lat, center_lon
            )
            
            n_clusters = len(np.unique(labels[labels != -1]))
            n_outliers = np.sum(labels == -1)
            print(f"  ✓ Created hull map: {n_clusters} clusters, {n_outliers} outliers")
            
        except Exception as e:
            print(f"  ✗ Failed to create hull map: {e}")
            import traceback
            traceback.print_exc()
    
    return hull_maps

# Generate hull maps for GGDS results using stored labels (no re-execution!)
hull_maps_ggds = generate_hull_maps_from_labels(X, ggds_labels)

print(f"\n✓ Generated {len(hull_maps_ggds)} convex hull maps for GGDS")


In [None]:
"""# Display the convex hull maps for GGDS results
for algorithm, map_obj in hull_maps_ggds.items():
    result = results_ggds['Crossfire'][algorithm]
    print(f"\n{'='*60}")
    print(f"{algorithm} - Convex Hull Visualization (GGDS)")
    print(f"{'='*60}")
    print(f"Score: {result['best_score']:.4f}")
    print(f"N Clusters: {result['n_clusters']}")
    display(map_obj)
"""

In [None]:
# Generate hull maps for Compact GGDS results using stored labels
print("Generating convex hull maps for Compact GGDS results...")
print("="*60)

hull_maps_compact_ggds = generate_hull_maps_from_labels(X, compact_ggds_labels)

print(f"\n✓ Generated {len(hull_maps_compact_ggds)} convex hull maps for Compact GGDS")


In [None]:
"""# Display the convex hull maps for Compact GGDS results
for algorithm, map_obj in hull_maps_compact_ggds.items():
    result = results_compact_ggds['Crossfire'][algorithm]
    print(f"\n{'='*60}")
    print(f"{algorithm} - Convex Hull Visualization (Compact GGDS)")
    print(f"{'='*60}")
    print(f"Score: {result['best_score']:.4f}")
    print(f"N Clusters: {result['n_clusters']}")
    display(map_obj)"""
