In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

In [2]:
file_path = 'combined.csv'
X = pd.read_csv(file_path)
X_scaled = StandardScaler().fit_transform(X)

In [None]:
tsne = TSNE(n_components=2, random_state=42, n_jobs=-1)
X_tsne = tsne.fit_transform(X_scaled)

plt.figure(figsize=(10, 10))
plt.scatter(X_tsne[:, 0], X_tsne[:,1], s=6, alpha=0.6, linewidths=0, rasterized=True)
plt.title("TSNE Visualization")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X)

In [None]:
from sklearn.metrics import silhouette_score, silhouette_samples, calinski_harabasz_score, davies_bouldin_score
from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture
import joblib

In [None]:
gmm = GaussianMixture(n_components=8, random_state=42)
gmm.fit(X_train_scaled)
joblib.dump(gmm, '/content/drive/MyDrive/Double_Hand_gesture/gesture_Raw_feature_gmm_model.pkl')

In [None]:
kmeans = KMeans(n_clusters=8, random_state=42)
kmeans.fit(X_train_scaled)
joblib.dump(kmeans, '/content/drive/MyDrive/Double_Hand_gesture/gesture_Raw_feature_kmeans_model.pkl')

In [None]:
db = DBSCAN(eps=1.098214, min_samples=12)
db.fit(X_train_scaled)
joblib.dump(db, '/content/drive/MyDrive/Double_Hand_gesture/gesture_Raw_feature_db_model.pkl')

In [None]:
def evaluate_model(model, X):
    y_pred = model.predict(X)
    s_score = silhouette_score(X, y_pred)
    db_score = davies_bouldin_score(X, y_pred)
    ch_score = calinski_harabasz_score(X, y_pred)
    print("Silhoutte Score:", s_score)
    print("DB Score:", db_score)
    print("CH score:", ch_score)

In [None]:
evaluate_model(gmm, X_train_scaled)

In [None]:
evaluate_model(gmm, X_test_scaled)

In [None]:
evaluate_model(kmeans, X_train_scaled)

In [None]:
evaluate_model(kmeans, X_test_scaled)

In [None]:
def evaluate_dbscan(model, X):
    y_pred = model.labels_  # DBSCAN label output

    s_score = silhouette_score(X, y_pred)
    db_score = davies_bouldin_score(X, y_pred)
    ch_score = calinski_harabasz_score(X, y_pred)

    print("Silhouette Score:", s_score)
    print("DB Score:", db_score)
    print("CH Score:", ch_score)

In [None]:
import numpy as np
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
from sklearn.neighbors import NearestNeighbors

def dbscan_predict(db, X_new):
    """
    Assign labels to X_new based on nearest DBSCAN core sample.
    - Points whose nearest core-sample distance > db.eps get label -1 (noise).
    - Returns an array of labels with same length as X_new.
    """
    # If no core samples (everything was noise), assign all -1
    try:
        core_samples = db.components_
    except AttributeError:
        # older sklearn: use db.core_sample_indices_ to slice original X used for fitting, but components_ should exist
        raise

    if core_samples.shape[0] == 0:
        return np.full(len(X_new), -1, dtype=int)

    # Fit a nearest-neighbor model on core samples
    nbr = NearestNeighbors(n_neighbors=1).fit(core_samples)
    distances, indices = nbr.kneighbors(X_new, return_distance=True)

    distances = distances.ravel()
    indices = indices.ravel()

    # Map core sample indices to cluster labels
    # core_sample_indices_ contains indices in the fitted data that correspond to core samples.
    # db.labels_[db.core_sample_indices_] gives labels of core samples aligned with core_samples order.
    core_labels = db.labels_[db.core_sample_indices_]
    assigned_labels = core_labels[indices]

    # Points too far from any core sample become noise
    assigned_labels[distances > db.eps] = -1

    return assigned_labels


def evaluate_dbscan(db, X, name="X"):
    """
    Evaluate DBSCAN on dataset X. If db.labels_ length equals len(X),
    uses the model's labels directly (i.e. evaluation on the fitted data).
    Otherwise it will try to assign labels to X via dbscan_predict.
    Handles degenerate cases and prints appropriate messages.
    """
    # decide whether labels correspond to this X
    if hasattr(db, "labels_") and len(db.labels_) == X.shape[0]:
        y_pred = db.labels_
        source = "model.labels_ (fitted data)"
    else:
        y_pred = dbscan_predict(db, X)
        source = "assigned by nearest core-sample (approximate)"

    # Check for valid clusters: at least 2 clusters (excluding noise) required for some metrics
    unique_labels = set(y_pred)
    n_clusters = len([lab for lab in unique_labels if lab != -1])

    print(f"Evaluating on {name}: {X.shape[0]} samples. Labels source: {source}")
    print(f"Found {n_clusters} cluster(s) (excluding noise). Unique labels: {sorted(unique_labels)}")

    if n_clusters == 0:
        print("No clusters found (all points labeled as noise). Metrics cannot be computed.")
        return y_pred
    if n_clusters == 1:
        # silhouette_score requires at least 2 clusters; CH and DB also expect >1 cluster
        print("Only one cluster found (plus maybe noise). Silhouette/DB/CH require >=2 clusters. Refit or use alternate metrics.")
        return y_pred

    # compute metrics (these will raise if something else is wrong)
    s_score = silhouette_score(X, y_pred)
    db_score = davies_bouldin_score(X, y_pred)
    ch_score = calinski_harabasz_score(X, y_pred)

    print("Silhouette Score:", s_score)
    print("DB Score:", db_score)
    print("CH Score:", ch_score)

    # return y_pred


In [None]:
evaluate_dbscan(db, X_train_scaled, name="X_train")

In [None]:
evaluate_dbscan(db, X_test_scaled, name="X_test")

In [None]:
# # DBSCAN tuning toolkit
# # Run this in your notebook after you have X_train_scaled loaded.

# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from sklearn.neighbors import NearestNeighbors
# from sklearn.cluster import DBSCAN
# from sklearn.decomposition import PCA
# from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
# from collections import Counter
# import joblib
# import warnings
# warnings.filterwarnings("ignore")

# # -------------------------
# # 1) k-distance plot helper
# # -------------------------
# def k_distance_plot(X, k=8, plot=True):
#     """
#     Plot sorted k-distance (distance to k-th nearest neighbor) useful for choosing eps.
#     Returns sorted k-distances array.
#     """
#     neigh = NearestNeighbors(n_neighbors=k)
#     neigh.fit(X)
#     distances, _ = neigh.kneighbors(X)
#     k_dist = np.sort(distances[:, -1])
#     if plot:
#         plt.figure(figsize=(8,4))
#         plt.plot(k_dist)
#         plt.xlabel(f"Points sorted by {k}-distance (ascending)")
#         plt.ylabel(f"{k}-distance (distance to {k}th NN)")
#         plt.title(f"k-distance plot (k={k}) ‚Äî look for the elbow to set eps")
#         plt.grid(True)
#         plt.show()
#     return k_dist

# # -------------------------
# # 2) robust evaluation for DBSCAN (safe)
# # -------------------------
# def evaluate_dbscan_labels(labels, X, verbose=True):
#     """
#     Accepts labels (model.labels_) and X.
#     Returns dict with n_clusters, n_noise, cluster_sizes and optionally metrics (if >=2 clusters).
#     """
#     labels = np.asarray(labels)
#     unique = np.unique(labels)
#     n_clusters = len(unique[unique != -1])
#     n_noise = int(np.sum(labels == -1))
#     counts = Counter(labels)
#     res = {
#         "n_samples": len(labels),
#         "n_clusters": n_clusters,
#         "n_noise": n_noise,
#         "cluster_sizes": dict(counts)
#     }
#     if verbose:
#         print(f"Samples: {res['n_samples']}  |  Clusters (excl. noise): {res['n_clusters']}  |  Noise: {res['n_noise']}")
#         print("Cluster sizes (label: count):", dict(counts))

#     # Compute metrics only if at least 2 clusters among non-noise
#     if n_clusters >= 2:
#         mask = labels != -1
#         X_core = X[mask]
#         y_core = labels[mask]
#         if len(np.unique(y_core)) >= 2:
#             res["silhouette"] = silhouette_score(X_core, y_core)
#             res["davies_bouldin"] = davies_bouldin_score(X_core, y_core)
#             res["calinski_harabasz"] = calinski_harabasz_score(X_core, y_core)
#             if verbose:
#                 print(f"Silhouette: {res['silhouette']:.4f}  |  Davies-Bouldin: {res['davies_bouldin']:.4f}  |  CH: {res['calinski_harabasz']:.1f}")
#         else:
#             if verbose:
#                 print("Not enough distinct non-noise clusters to compute metrics.")
#     else:
#         if verbose:
#             print("Fewer than 2 clusters found ‚Üí skipping silhouette/DB/CH metrics.")
#     return res

# # -------------------------
# # 3) Auto-generate eps candidates from k-distance percentiles
# # -------------------------
# def eps_candidates_from_kdist(k_dist, percentiles=[85,88,90,92,94,96,98], expand=0.2):
#     """
#     Use percentiles of the k-distance as eps candidates.
#     'expand' controls a small neighborhood around each percentile to try multiple eps values.
#     Returns a sorted unique list of candidate eps values.
#     """
#     vals = np.percentile(k_dist, percentiles)
#     candidates = []
#     for v in vals:
#         # create a small range around the percentile value
#         lo = max(v * (1 - expand/2), 1e-6)
#         hi = v * (1 + expand/2)
#         # generate 3 candidates in that small band
#         candidates.extend(np.linspace(lo, hi, 3))
#     candidates = np.unique(np.round(candidates, 6))
#     return sorted(candidates)

# # -------------------------
# # 4) Grid search over eps & min_samples (lightweight)
# # -------------------------
# def dbscan_grid_search(X, eps_list, min_samples_list, verbose=False):
#     """
#     For each (eps, min_samples) fit DBSCAN, record n_clusters, n_noise and metrics (if possible).
#     Returns pandas DataFrame with results sorted by silhouette (descending) where available.
#     """
#     rows = []
#     for eps in eps_list:
#         for ms in min_samples_list:
#             model = DBSCAN(eps=eps, min_samples=ms)
#             model.fit(X)
#             labels = model.labels_
#             res = evaluate_dbscan_labels(labels, X, verbose=False)
#             row = {
#                 "eps": eps,
#                 "min_samples": ms,
#                 "n_clusters": res["n_clusters"],
#                 "n_noise": res["n_noise"],
#                 "n_samples": res["n_samples"],
#                 "silhouette": res.get("silhouette", np.nan),
#                 "davies_bouldin": res.get("davies_bouldin", np.nan),
#                 "calinski_harabasz": res.get("calinski_harabasz", np.nan)
#             }
#             rows.append(row)
#             if verbose:
#                 print(f"eps={eps:.4f}, min_samples={ms} -> clusters={row['n_clusters']}, noise={row['n_noise']}, sil={row['silhouette']}")
#     df = pd.DataFrame(rows)
#     # sort: prefer highest silhouette (non-nan), fallback to more clusters (but not too many), then fewer noise
#     df_sorted = df.sort_values(by=["silhouette", "n_clusters", "n_noise"], ascending=[False, False, True]).reset_index(drop=True)
#     return df_sorted

# # -------------------------
# # 5) PCA scatter plot of clustering result
# # -------------------------
# def plot_dbscan_result(X, labels, title=None, pca_components=2):
#     """
#     Plot clusters using PCA reduction (2 components). Noise shown in gray.
#     """
#     labels = np.asarray(labels)
#     pca = PCA(n_components=pca_components)
#     X2 = pca.fit_transform(X)
#     unique_labels = np.unique(labels)
#     plt.figure(figsize=(7,6))
#     for lab in unique_labels:
#         mask = labels == lab
#         if lab == -1:
#             # noise
#             plt.scatter(X2[mask,0], X2[mask,1], s=10, marker='x', alpha=0.4, label='noise (-1)')
#         else:
#             plt.scatter(X2[mask,0], X2[mask,1], s=20, alpha=0.6, label=f'cluster {lab}')
#     plt.title(title if title else "DBSCAN clustering (PCA 2D)")
#     plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
#     plt.tight_layout()
#     plt.show()

# # -------------------------
# # 6) End-to-end runner
# # -------------------------
# def run_dbscan_tuning(X, k_for_kdist=8, percentiles=[85,88,90,92,94,96,98], min_samples_list=None, expand=0.25, verbose=False):
#     if min_samples_list is None:
#         min_samples_list = [max(3, k_for_kdist-2), k_for_kdist, k_for_kdist+2, k_for_kdist+4]  # e.g. [6,8,10,12] if k=8

#     print("1) k-distance plot (inspect the elbow to choose eps):")
#     k_dist = k_distance_plot(X, k=k_for_kdist, plot=True)

#     eps_cand = eps_candidates_from_kdist(k_dist, percentiles=percentiles, expand=expand)
#     # if eps candidates are all extremely small or large, add a small linear range as backup
#     if len(eps_cand) < 3:
#         eps_cand = np.unique(np.round(np.linspace(max(1e-4, eps_cand[0]*0.5), eps_cand[-1]*1.5, 6), 6)).tolist()
#     print(f"Auto-generated {len(eps_cand)} eps candidates (sample): {eps_cand[:6]} ...")

#     print(f"Trying min_samples values: {min_samples_list}")

#     print("\n2) Running grid search over eps x min_samples (this can take a while for large grids)...")
#     df = dbscan_grid_search(X, eps_cand, min_samples_list, verbose=verbose)

#     print("\nGrid search complete. Top candidates sorted by silhouette (non-noise):")
#     display_df = df.copy()
#     display_df['silhouette'] = display_df['silhouette'].round(4)
#     display_df['davies_bouldin'] = display_df['davies_bouldin'].round(4)
#     display_df['calinski_harabasz'] = display_df['calinski_harabasz'].round(2)
#     print(display_df.head(20).to_string(index=False))

#     # Save results
#     df.to_csv("dbscan_grid_search_results.csv", index=False)
#     print("\nSaved results to dbscan_grid_search_results.csv")

#     return df

# # ------------------------------------------------------
# # Example usage:
# # ------------------------------------------------------
# df_results = run_dbscan_tuning(X_train_scaled, k_for_kdist=8, percentiles=[85,88,90,92,94,96,98], min_samples_list=[6,8,10,12])
# # After that, pick a promising row from df_results (high silhouette, reasonable cluster count, not too much noise)
# # Example to visualize best (highest silhouette) row:
# #
# best = df_results.dropna(subset=["silhouette"]).sort_values("silhouette", ascending=False).iloc[0]
# print(best)
# model_best = DBSCAN(eps=best.eps, min_samples=int(best.min_samples)).fit(X_train_scaled)
# plot_dbscan_result(X_train_scaled, model_best.labels_, title=f"DBSCAN eps={best.eps}, min_samples={best.min_samples}")
# #
# # To save the best model:
# joblib.dump(model_best, '/content/drive/MyDrive/Double_Hand_gesture/gesture_dbscan_best.pkl')
# #
# # If there are no rows with silhouette (all NaN), inspect rows with reasonable cluster counts (n_clusters between 2 and, say, 20)


In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.mixture import GaussianMixture
# import matplotlib.pyplot as plt
# import joblib
# from sklearn.preprocessing import MinMaxScaler
# from umap import UMAP

# # -----------------------------
# # Load & scale
# # -----------------------------
# file_path = '/content/drive/MyDrive/Double_Hand_gesture/Gesture with mean and variance/Combined_mean_and_variance.csv'
# X = pd.read_csv(file_path)
# print(X)

# scaler_data = MinMaxScaler()
# X_scaled = scaler_data.fit_transform(X)
# joblib.dump(scaler_data, "scaler.pkl")

# # -----------------------------
# # UMAP before clustering (no colors)
# # -----------------------------
# umap = UMAP(n_components=2, n_neighbors=15, min_dist=0.1, n_jobs=-1, random_state=42)
# X_umap = umap.fit_transform(X_scaled)
# plt.figure(figsize=(10, 10))
# plt.scatter(X_umap[:, 0], X_umap[:, 1], s=6, alpha=0.6, linewidths=0, rasterized=True)
# plt.title("UMAP Visualization before Clustering")
# plt.xlabel("Dimension 1")
# plt.ylabel("Dimension 2")
# plt.tight_layout()
# plt.savefig("UMAP_before_clustering.png", dpi=200)
# plt.show()
# joblib.dump(umap, "umap.pkl")


#################################################################

# -----------------------------
# Train & save GMM
# -----------------------------
#

# # -----------------------------
# # Save labeled CSV
# # -----------------------------
# encoding = {0: "Royal", 1: "Green"}
# X_out = X.copy()
# X_out['target'] = [encoding[int(x)] for x in cluster_labels]
# X_out.to_csv("deepika.csv", index=False)

In [None]:
# import os
# import joblib
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import matplotlib.cm as cm
# from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 (kept for 3D projection)
# from sklearn.cluster import KMeans, DBSCAN
# from sklearn.mixture import GaussianMixture
# from sklearn.metrics import silhouette_score, silhouette_samples
# from sklearn.decomposition import PCA
# from datetime import datetime
# import warnings

In [None]:
# # -*- coding: utf-8 -*-
# """
# Hand Gesture Clustering (KMeans / DBSCAN / GMM)
# Using Combined Mean + Variance Features (6 total)
# 3D Visualization with Color, Size, and Transparency Encoding
# + Silhouette analysis (global, per-cluster, plot)
# + Per-cluster bar graph
# + PCA 3D visualization colored by per-sample silhouette (heatmap-like)
# + Each run saves outputs in a timestamped folder (Asia/Kolkata timezone)
# """

# import os
# import joblib
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import matplotlib.cm as cm
# from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 (kept for 3D projection)
# from sklearn.cluster import KMeans, DBSCAN
# from sklearn.mixture import GaussianMixture
# from sklearn.metrics import silhouette_score, silhouette_samples
# from sklearn.decomposition import PCA
# from datetime import datetime
# import warnings

# # timezone handling (zoneinfo available in Python 3.9+)
# try:
#     from zoneinfo import ZoneInfo
#     KOLKATA_TZ = ZoneInfo("Asia/Kolkata")
# except Exception:
#     KOLKATA_TZ = None

# warnings.filterwarnings("ignore", category=UserWarning)

# # ================================================================
# # Configuration / Labels
# # ================================================================
# GESTURE_LABELS = ['Cleaning', 'Come', 'Emergency Calling', 'Give',
#                   'Good', 'Pick', 'Stack', 'Wave']

# RANDOM_STATE = 42

# # ================================================================
# # Utility Functions
# # ================================================================
# def normalize_data(df):
#     """Min-max normalize all numeric features to [0,1]."""
#     for col in df.columns:
#         if np.issubdtype(df[col].dtype, np.number):
#             col_min = df[col].min()
#             col_max = df[col].max()
#             df[col] = (df[col] - col_min) / (col_max - col_min + 1e-9)
#     return df

# def load_csv(file_path):
#     df = pd.read_csv(file_path)
#     df = normalize_data(df)
#     return df

# def save_model(model, path):
#     joblib.dump(model, path)
#     print(f"üíæ Saved model: {path}")

# def make_timestamped_run_dir(base_dir):
#     """
#     Create a new subfolder in base_dir with a timestamp, return its path.
#     Timestamp uses Asia/Kolkata timezone if available.
#     """
#     if KOLKATA_TZ:
#         now = datetime.now(KOLKATA_TZ)
#     else:
#         now = datetime.now()
#     ts = now.strftime("%Y%m%d_%H%M%S")
#     run_dir = os.path.join(base_dir, f"run_{ts}")
#     os.makedirs(run_dir, exist_ok=True)
#     return run_dir, ts

# # ================================================================
# # Visualization Utilities
# # ================================================================
# def visualize_6d_in_3d(X, labels, label_map, feature_names, title="6D Visualization (3D Projection)", save_path=None):
#     """
#     Visualizes 6 features in 3D:
#       - 3 features on X, Y, Z axes
#       - 4th feature mapped to color
#       - 5th feature mapped to size
#       - 6th feature affects brightness (via color scaling)
#     """
#     fig = plt.figure(figsize=(12, 10))
#     ax = fig.add_subplot(111, projection='3d')

#     cmap = plt.get_cmap('viridis')

#     # Mapping features
#     color_feature = (X[:, 3] - X[:, 3].min()) / (X[:, 3].max() - X[:, 3].min() + 1e-9)
#     size_feature = (X[:, 4] - X[:, 4].min()) / (X[:, 4].max() - X[:, 4].min() + 1e-9)
#     bright_feature = (X[:, 5] - X[:, 5].min()) / (X[:, 5].max() - X[:, 5].min() + 1e-9)

#     color_vals = cmap(color_feature * 0.7 + 0.3 * bright_feature)

#     unique_labels = np.unique(labels)

#     for label in unique_labels:
#         indices = labels == label
#         gesture_name = label_map.get(label, f"Cluster {label}")

#         ax.scatter(
#             X[indices, 0], X[indices, 1], X[indices, 2],
#             c=color_vals[indices],
#             s=50 + 200 * size_feature[indices],
#             alpha=0.8,
#             edgecolor='k',
#             linewidth=0.3,
#             label=gesture_name
#         )

#         if indices.sum() > 0:
#             centroid = np.mean(X[indices, :3], axis=0)
#             ax.text(
#                 centroid[0], centroid[1], centroid[2],
#                 gesture_name,
#                 fontsize=10, fontweight='bold', color='black',
#                 bbox=dict(facecolor='white', edgecolor='gray', alpha=0.7, boxstyle="round,pad=0.3")
#             )

#     ax.set_xlabel(feature_names[0], fontsize=12, fontweight='bold')
#     ax.set_ylabel(feature_names[1], fontsize=12, fontweight='bold')
#     ax.set_zlabel(feature_names[2], fontsize=12, fontweight='bold')
#     ax.set_title(title, fontsize=16, fontweight='bold', pad=12)
#     ax.legend(title="Gesture Clusters", fontsize=9, title_fontsize=11)
#     plt.tight_layout()
#     if save_path:
#         fig.savefig(save_path, dpi=200)
#         print(f"üñº Saved 6D->3D visualization: {save_path}")
#     plt.show()
#     plt.close(fig)

# def plot_silhouette(X, labels, method_name, out_dir):
#     """
#     Classic silhouette plot (one bar per sample grouped by cluster)
#     """
#     valid_mask = labels != -1
#     valid_labels = labels[valid_mask]
#     X_valid = X[valid_mask]

#     if len(np.unique(valid_labels)) < 2:
#         print("‚ö†Ô∏è Silhouette plot skipped - need at least 2 clusters (excluding noise).")
#         return

#     # Compute silhouette values
#     sil_vals = silhouette_samples(X_valid, valid_labels)
#     y_lower = 10
#     fig, ax = plt.subplots(1, 1, figsize=(10, 7))

#     unique_clusters = np.unique(valid_labels)
#     # color mapping per cluster
#     norm = plt.Normalize(vmin=unique_clusters.min(), vmax=unique_clusters.max())
#     colors = cm.nipy_spectral(norm(unique_clusters))

#     for i, c in enumerate(unique_clusters):
#         c_sil_vals = sil_vals[valid_labels == c]
#         c_sil_vals.sort()
#         size_cluster = c_sil_vals.shape[0]
#         y_upper = y_lower + size_cluster

#         ax.fill_betweenx(np.arange(y_lower, y_upper),
#                          0, c_sil_vals,
#                          facecolor=colors[i], alpha=0.7)
#         ax.text(-0.05, y_lower + 0.5 * size_cluster, f"Cluster {c} (n={size_cluster})", fontsize=9)
#         y_lower = y_upper + 10  # spacing between clusters

#     ax.set_title(f"Silhouette Plot for {method_name}", fontsize=14, fontweight='bold')
#     ax.set_xlabel("Silhouette coefficient values")
#     ax.set_ylabel("Cluster label and sample index")
#     ax.axvline(x=np.mean(sil_vals), color="red", linestyle="--", label="Average silhouette")
#     ax.set_yticks([])  # clear the y-axis ticks
#     ax.legend(loc="upper right")
#     plt.tight_layout()
#     save_path = os.path.join(out_dir, f"silhouette_plot_{method_name}.png")
#     fig.savefig(save_path, dpi=200)
#     print(f"üñº Saved silhouette plot: {save_path}")
#     plt.show()
#     plt.close(fig)

# def plot_per_cluster_bar(mean_sil_per_cluster, method_name, out_dir, label_map=None):
#     """
#     Bar graph: mean silhouette for each cluster
#     """
#     clusters = list(mean_sil_per_cluster.keys())
#     means = [mean_sil_per_cluster[c] for c in clusters]
#     names = [label_map.get(c, str(c)) if label_map else str(c) for c in clusters]

#     fig, ax = plt.subplots(figsize=(10, 6))
#     bars = ax.bar(range(len(clusters)), means, tick_label=names, alpha=0.85, edgecolor='k')
#     ax.set_title(f"Per-Cluster Mean Silhouette ({method_name})", fontsize=14, fontweight='bold')
#     ax.set_ylabel("Mean silhouette score")
#     ax.set_ylim([-0.1, 1.0])
#     ax.set_xticklabels(names, rotation=45, ha='right')
#     for bar, val in zip(bars, means):
#         ax.text(bar.get_x() + bar.get_width() / 2.0, val + 0.02, f"{val:.3f}", ha='center', va='bottom', fontsize=9)
#     plt.tight_layout()
#     save_path = os.path.join(out_dir, f"per_cluster_bar_{method_name}.png")
#     fig.savefig(save_path, dpi=200)
#     print(f"üñº Saved per-cluster bar chart: {save_path}")
#     plt.show()
#     plt.close(fig)

# def plot_pca_3d_silhouette(X, labels, sample_silhouette_vals, method_name, out_dir, label_map=None):
#     """
#     PCA -> 3D scatter where points are colored by their silhouette value.
#     Also draws cluster centroids in PCA space.
#     """
#     valid_mask = labels != -1
#     X_valid = X[valid_mask]
#     labels_valid = labels[valid_mask]
#     sil_vals = sample_silhouette_vals

#     if X_valid.shape[0] == 0:
#         print("‚ö†Ô∏è PCA silhouette plot skipped - no valid samples (all noise).")
#         return

#     pca = PCA(n_components=3, random_state=RANDOM_STATE)
#     X_pca = pca.fit_transform(X_valid)

#     fig = plt.figure(figsize=(12, 9))
#     ax = fig.add_subplot(111, projection='3d')

#     sc = ax.scatter(
#         X_pca[:, 0], X_pca[:, 1], X_pca[:, 2],
#         c=sil_vals, cmap='viridis', s=40, alpha=0.9, edgecolor='k', linewidth=0.2
#     )
#     plt.colorbar(sc, ax=ax, shrink=0.6, pad=0.1, label='Silhouette value')

#     # Plot centroids per cluster in PCA space
#     unique_clusters = np.unique(labels_valid)
#     for c in unique_clusters:
#         indices = labels_valid == c
#         centroid = X_pca[indices].mean(axis=0)
#         ax.text(centroid[0], centroid[1], centroid[2], label_map.get(c, f"Cluster {c}") if label_map else f"Cluster {c}",
#                 fontsize=10, fontweight='bold', color='black',
#                 bbox=dict(facecolor='white', edgecolor='gray', alpha=0.7, boxstyle="round,pad=0.3"))

#     ax.set_title(f"PCA (3D) colored by silhouette values ({method_name})", fontsize=14, fontweight='bold')
#     ax.set_xlabel("PCA 1")
#     ax.set_ylabel("PCA 2")
#     ax.set_zlabel("PCA 3")
#     plt.tight_layout()
#     save_path = os.path.join(out_dir, f"pca3d_silhouette_{method_name}.png")
#     fig.savefig(save_path, dpi=200)
#     print(f"üñº Saved PCA 3D silhouette plot: {save_path}")
#     plt.show()
#     plt.close(fig)

# # ================================================================
# # Clustering helpers
# # ================================================================
# def train_kmeans(X, n_clusters):
#     model = KMeans(n_clusters=n_clusters, random_state=RANDOM_STATE, n_init=10)
#     model.fit(X)
#     return model, model.predict(X)

# def train_dbscan(X, eps=0.1, min_samples=5):
#     model = DBSCAN(eps=eps, min_samples=min_samples)
#     model.fit(X)
#     return model, model.labels_

# def train_gmm(X, n_clusters):
#     model = GaussianMixture(n_components=n_clusters, random_state=RANDOM_STATE)
#     model.fit(X)
#     return model, model.predict(X)

# # ================================================================
# # Main
# # ================================================================
# if __name__ == "__main__":
#     # --- Paths (edit if needed) ---
#     base_model_dir = "/content/drive/MyDrive/Double_Hand_gesture"  # base directory where timestamped runs will be created
#     os.makedirs(base_model_dir, exist_ok=True)

#     # Create timestamped run directory
#     run_dir, ts = make_timestamped_run_dir(base_model_dir)
#     print(f"üìÅ Created run folder: {run_dir}")

#     # Choose clustering method: "kmeans", "dbscan", or "gmm"
#     method = "gmm"

#     # CSV path (edit to your dataset location)
#     csv_path = "/content/drive/MyDrive/Double_Hand_gesture/Gesture with mean and variance/Combined_mean_and_variance.csv"

#     # --- Load dataset ---
#     df = load_csv(csv_path)
#     print(f"‚úÖ Loaded: {csv_path}")
#     print(f"Shape: {df.shape}")
#     print(f"Columns: {list(df.columns)}")

#     # --- Select six mean+variance features ---
#     feature_candidates = [c for c in df.columns if any(k in c.lower() for k in ["mean", "var", "variance"])]
#     if len(feature_candidates) < 6:
#         raise ValueError(f"Expected ‚â•6 features; found {len(feature_candidates)}: {feature_candidates}")

#     feature_cols = feature_candidates[:6]  # pick first six
#     X = df[feature_cols].values
#     print(f"üìä Using features: {feature_cols}")

#     # ================================================================
#     # Train and Evaluate
#     # ================================================================
#     if method == "kmeans":
#         model, labels = train_kmeans(X, len(GESTURE_LABELS))
#         save_model(model, os.path.join(run_dir, "kmeans_6feature_model.pkl"))
#         title = "KMeans"

#     elif method == "dbscan":
#         # You may want to tune eps/min_samples for your dataset
#         model, labels = train_dbscan(X, eps=0.12, min_samples=5)
#         save_model(model, os.path.join(run_dir, "dbscan_6feature_model.pkl"))
#         title = "DBSCAN"

#     elif method == "gmm":
#         model, labels = train_gmm(X, len(GESTURE_LABELS))
#         save_model(model, os.path.join(run_dir, "gmm_6feature_model.pkl"))
#         title = "GMM"

#     else:
#         raise ValueError("Invalid method. Choose 'kmeans', 'dbscan', or 'gmm'.")

#     print(f"\n‚úÖ Training complete using {method.upper()}.")
#     print("Cluster IDs found:", np.unique(labels))

#     # ================================================================
#     # Map Clusters to Gestures
#     # ================================================================
#     unique_labels = np.unique(labels)
#     label_map = {lbl: GESTURE_LABELS[i % len(GESTURE_LABELS)] for i, lbl in enumerate(unique_labels)}

#     print("\nüß© Cluster ‚Üí Gesture Mapping:")
#     for k, v in label_map.items():
#         print(f"  Cluster {k} ‚Üí {v}")

#     map_path = os.path.join(run_dir, f"{method}_6feature_label_map.pkl")
#     joblib.dump(label_map, map_path)
#     print(f"üíæ Mapping saved to: {map_path}")

#     # Also save a small CSV summarizing counts per cluster
#     counts = {}
#     for lbl in unique_labels:
#         counts[int(lbl)] = int((labels == lbl).sum())
#     summary_df = pd.DataFrame.from_dict(counts, orient='index', columns=['count']).sort_index()
#     summary_df.index.name = 'cluster'
#     summary_df['gesture_label'] = summary_df.index.map(lambda x: label_map.get(x, ""))
#     summary_csv_path = os.path.join(run_dir, f"{method}_cluster_summary_{ts}.csv")
#     summary_df.to_csv(summary_csv_path)
#     print(f"üíæ Cluster summary CSV saved: {summary_csv_path}")

#     # ================================================================
#     # Evaluate Clustering Quality (global and per-cluster)
#     # ================================================================
#     valid_mask = labels != -1
#     if len(np.unique(labels[valid_mask])) > 1:
#         global_score = silhouette_score(X[valid_mask], labels[valid_mask])
#         print(f"üìà Silhouette Score ({method.upper()}, 6 features): {global_score:.4f}")
#     else:
#         global_score = None
#         print("‚ö†Ô∏è Silhouette Score not computed (need ‚â•2 clusters excluding noise).")

#     # Per-cluster silhouette (and save sample-level silhouette values)
#     per_cluster_mean = {}
#     sample_silhouette_vals = None

#     if len(np.unique(labels[valid_mask])) > 1:
#         sample_silhouette_vals = silhouette_samples(X[valid_mask], labels[valid_mask])
#         clusters = np.unique(labels[valid_mask])
#         for c in clusters:
#             cluster_vals = sample_silhouette_vals[labels[valid_mask] == c]
#             if len(cluster_vals) > 0:
#                 per_cluster_mean[c] = float(cluster_vals.mean())
#             else:
#                 per_cluster_mean[c] = float('nan')

#         print("\nüîé Per-Cluster Silhouette Scores:")
#         for c, m in per_cluster_mean.items():
#             print(f"  Cluster {c}: mean silhouette = {m:.4f} (n={int((labels[valid_mask] == c).sum())})")
#     else:
#         print("‚ö†Ô∏è Per-cluster silhouette not computed (need ‚â•2 clusters excluding noise).")

#     # Save per-cluster means (as npy and csv)
#     pc_npy = os.path.join(run_dir, f"{method}_per_cluster_silhouette_means.npy")
#     np.save(pc_npy, per_cluster_mean)
#     pc_csv = os.path.join(run_dir, f"{method}_per_cluster_silhouette_means_{ts}.csv")
#     pd.DataFrame.from_dict(per_cluster_mean, orient='index', columns=['mean_silhouette']).to_csv(pc_csv)
#     print(f"üíæ Per-cluster silhouette means saved: {pc_npy} and {pc_csv}")

#     # Save sample-level silhouette values aligned to original indices (where valid_mask True)
#     if sample_silhouette_vals is not None:
#         sample_sil_array = np.full(shape=(labels.shape[0],), fill_value=np.nan)
#         # put silhouette values back into original positions
#         sample_sil_array[valid_mask] = sample_silhouette_vals
#         sample_sil_path = os.path.join(run_dir, f"{method}_sample_silhouette_vals_{ts}.npy")
#         np.save(sample_sil_path, sample_sil_array)
#         print(f"üíæ Sample-level silhouette values saved: {sample_sil_path}")

#     # ================================================================
#     # Plotting: silhouette plot, per-cluster bar, PCA 3D silhouette heatmap
#     # ================================================================
#     if len(np.unique(labels[valid_mask])) > 1:
#         plot_silhouette(X, labels, method, run_dir)
#         plot_per_cluster_bar(per_cluster_mean, method, run_dir, label_map)
#         plot_pca_3d_silhouette(X, labels, sample_silhouette_vals, method, run_dir, label_map)
#     else:
#         print("‚ö†Ô∏è Skipping silhouette and PCA plots - need ‚â•2 clusters (excluding noise).")

#     # ================================================================
#     # 6D -> 3D visualization (original style)
#     # ================================================================
#     viz_path = os.path.join(run_dir, f"{method}_6d_to_3d_viz_{ts}.png")
#     visualize_6d_in_3d(X, labels, label_map, feature_cols, title=f"{method.upper()} (6D -> 3D)", save_path=viz_path)

#     # ================================================================
#     # Final save of model and labels (if not already saved)
#     # ================================================================
#     labels_path = os.path.join(run_dir, f"{method}_6feature_labels_{ts}.npy")
#     np.save(labels_path, labels)
#     print(f"üíæ Saved cluster labels: {labels_path}")

#     # Save a small run-metadata JSON for reproducibility
#     try:
#         import json
#         meta = {
#             "timestamp": ts,
#             "method": method,
#             "feature_cols": feature_cols,
#             "n_samples": int(X.shape[0]),
#             "n_features": int(X.shape[1]),
#             "global_silhouette": float(global_score) if global_score is not None else None,
#             "cluster_counts": counts
#         }
#         meta_path = os.path.join(run_dir, f"run_metadata_{ts}.json")
#         with open(meta_path, "w") as f:
#             json.dump(meta, f, indent=2)
#         print(f"üíæ Run metadata saved: {meta_path}")
#     except Exception as e:
#         print("‚ö†Ô∏è Could not save run metadata JSON:", e)

#     print("\nüéâ All done. Plots and models saved inside:", run_dir)
