# Dendritic Spine Clustering

Glossary: 
- Group — a division of spines defined by experiment design, e.g., division of spines into experimental and control groups;
- Class — a group of spines that meet specific criteria, e.g., stubby, mushroom, thin and filopodia groupings. In clustering results classes are also called groups, because classification is a variation of spine grouping;
- Cluster — a homogenous group of spines in the data based on their morphometric features.

1. Set `dataset_path`, `show_reduction_method` and `manual_classfication` (optional).

In [None]:
from spine_metrics import SpineMetricDataset
from notebook_widgets import SpineMeshDataset, intersection_ratios_mean_distance, create_dir
from spine_segmentation import apply_scale
from spine_fitter import SpineGrouping
from spine_clusterization import SpineClusterizer
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import silhouette_score
from typing import Optional
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)


dataset_path = "example dataset"
scale = (1, 1, 1)
show_reduction_method = "pca"
    
# load meshes and apply scale
spine_dataset = SpineMeshDataset().load(dataset_path)
spine_dataset.apply_scale(scale)

# load manual classification or other labeling in <manual_classification> variable, comment out lines with the unused file, 
# or all lines if required and set manual_classification as None
manual_classification = SpineGrouping().load(f"{dataset_path}/manual_classification/manual_classification_merged_reduced.json")
# manual_classification = SpineGrouping().load(f"{dataset_path}/labeling_by_dirs.json")
manual_classification = manual_classification.get_spines_subset(spine_dataset.spine_names)



# load metrics
spine_metrics = SpineMetricDataset().load(f"{dataset_path}/metrics.csv")
# comment out the line below, if you don't have manual classification or labeling
spine_metrics = spine_metrics.get_spines_subset(manual_classification.samples)


# extract metric subsets
classic = spine_metrics.get_metrics_subset(['OpenAngle', 'CVD', "JunctionArea", 'AverageDistance', 'Length', 'Area', 'Volume', 'ConvexHullVolume', 'ConvexHullRatio', "LengthVolumeRatio", "LengthAreaRatio"])
chord = spine_metrics.get_metrics_subset(['ChordDistribution'])

# prepare folders for export
create_dir(f"{dataset_path}/clustering")
classic_save_path = f"{dataset_path}/clustering/classic"
create_dir(classic_save_path)
chord_save_path = f"{dataset_path}/clustering/chord"
create_dir(f"{dataset_path}/clustering/chord")

2. Functions for calculation of elbow score and silhouette metrics.

In [None]:
# elbow method
def kmeans_elbow_score(clusterizer: SpineClusterizer) -> float:
    # sum of mean distances to cluster center
    output = 0
    for group in clusterizer.grouping.groups.values():
        center = sum(clusterizer.fit_metrics.row_as_array(spine_name) for spine_name in group)
        output += sum(np.inner(center - clusterizer.fit_metrics.row_as_array(spine_name),
                               center - clusterizer.fit_metrics.row_as_array(spine_name)) for spine_name in group)
    return output

def silhouette(clusterizer: SpineClusterizer, metric: Optional[callable] = None) -> float:
    datas = []
    labels = []
    for i, group in enumerate(clusterizer.grouping.groups.values()):
        datas.extend(clusterizer.fit_metrics.row_as_array(spine) for spine in group)
        labels.extend([i for _ in group])
    
    labels = np.array(labels)
    if metric is None:
        score = silhouette_score(datas, labels, metric=clusterizer.metric)
    else:
        score = silhouette_score(np.array([[metric(x1, x2) for x1 in datas] for x2 in datas]), labels, metric="precomputed")
    return score


## k-Means Classic Metrics

3. Clustering using classical metrics. In `score_func` choose elbow score/silhoutte or max divergence criteria.

In [None]:
from notebook_widgets import k_means_clustering_experiment_widget 

classification = manual_classification 

score_func = lambda clusterizer: intersection_ratios_mean_distance(classification, clusterizer.grouping, False)
#score_func = silhouette
#score_func = kmeans_elbow_score


dim_reduction = ""

display(k_means_clustering_experiment_widget(classic, spine_metrics, spine_dataset, score_func,
                                             max_num_of_clusters=17, classification=classification,
                                             save_folder=classic_save_path, dim_reduction=dim_reduction, show_method=show_reduction_method))

4. View labeled groups or classes distribution.

In [None]:
from notebook_widgets import show_class_in_space

display(show_class_in_space(manual_classification, classic))

## k-Means Chord Histograms

5. Clustering using chord distribution.

In [None]:
from notebook_widgets import k_means_clustering_experiment_widget

classification = manual_classification 

score_func = lambda clusterizer: intersection_ratios_mean_distance(classification, clusterizer.grouping, False)
score_func = kmeans_elbow_score


dim_reduction = ""

display(k_means_clustering_experiment_widget(chord, spine_metrics, spine_dataset, score_func,
                                             max_num_of_clusters=17, classification=classification,
                                             save_folder=chord_save_path, dim_reduction=dim_reduction, show_method=show_reduction_method))

6. View labeled groups or classes distribution.

In [None]:
from notebook_widgets import show_class_in_space

display(show_class_in_space(manual_classification, chord))

## View clustering

7. View each clustering result for the dataset.

In [None]:
from notebook_widgets import inspect_saved_groupings_widget

display(inspect_saved_groupings_widget(f"{dataset_path}/clustering", spine_dataset, spine_metrics,
                                       chord, classic, manual_classification))