### Задание со звездочкой 17. Dunn index
Реализуйте Dunn Index. Будет засчитываться не абы-какая реализация, а соответствующая по стилю и оформлению реализации метрик в sklearn: обязательны докстринги, валидаторы и все такое.

То, как реализованы другие метрики, можно посмотреть [тут](https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/metrics/cluster/_unsupervised.py#L195).

In [1]:
import numpy as np
from sklearn.utils._param_validation import validate_params
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.utils.validation import check_array

@validate_params({
    'X': [np.ndarray],
    'labels': [np.ndarray],
    'metric': [str],
})
def dunn_score(X, labels, metric='euclidean'):
    """
    Compute the Dunn Index for a clustering solution (based on the Wikipedia definition).

    Parameters
    ----------
    X : ndarray of shape (n_samples, n_features)
        The data points.

    labels : ndarray of shape (n_samples,)
        Cluster labels for each point.

    metric : str, default='euclidean'
        The metric to use for calculating distances.

    Returns
    -------
    dunn_index : float
        The Dunn Index for the given clustering. Higher values indicate better-defined clusters.

    Notes
    -----
    The Dunn Index is defined as the ratio between the minimum inter-cluster distance
    and the maximum intra-cluster distance:

        Dunn Index = min_inter_cluster_distance / max_intra_cluster_distance

    Reference: https://en.wikipedia.org/wiki/Dunn_index
    """
    X = check_array(X, ensure_min_samples=2, ensure_min_features=1)
    labels = check_array(labels, ensure_2d=False, dtype=int)

    if X.shape[0] != labels.shape[0]:
        raise ValueError("The number of samples in X and labels must match.")

    # Unique clusters
    clusters = np.unique(labels)
    if len(clusters) < 2:
        raise ValueError("The number of unique clusters must be at least 2.")

    # Compute centroids of each cluster
    centroids = np.array([X[labels == cluster].mean(axis=0) for cluster in clusters])

    # Compute max intra-cluster distance (to centroid)
    max_intra_cluster_distance = 0
    for cluster, centroid in zip(clusters, centroids):
        cluster_points = X[labels == cluster]
        intra_distances = pairwise_distances(cluster_points, [centroid], metric=metric)
        max_intra_cluster_distance = max(max_intra_cluster_distance, intra_distances.max())

    if max_intra_cluster_distance == 0:
        raise ValueError("All points in the same cluster are identical.")

    # Compute min inter-cluster distance (between centroids)
    inter_cluster_distances = pairwise_distances(centroids, metric=metric)
    np.fill_diagonal(inter_cluster_distances, np.inf)
    min_inter_cluster_distance = inter_cluster_distances.min()

    # Compute Dunn Index
    dunn_index = min_inter_cluster_distance / max_intra_cluster_distance

    return dunn_index


In [2]:
from sklearn.datasets import make_blobs

import torch
from torchmetrics.functional.clustering import dunn_index

# tensor Dunn index for comparison
def tensor_dunn_score(X, y):
    
    data = torch.tensor(X)
    labels = torch.tensor(y)
    
    return dunn_index(data, labels)


# 1. Simple verification on synthetic data
print("1. Simple verification on synthetic data:")
X, y = make_blobs(n_samples=10, centers=2, cluster_std=0.5, random_state=42)
print("Dunn Index:", dunn_score(X, y))
print("Dunn Index by torch:", tensor_dunn_score(X, y))

# 2. Verification with one cluster (must be ValueError)
print("\n2. Verification with one cluster (must be ValueError):")
try:
    X_single, y_single = X, np.zeros(X.shape[0])
    dunn_score(X_single, y_single)
except ValueError as e:
    print("Caught expected error for single cluster:", e)

# 3. Verification for bad clusters
print("\n3. Verification for for poorly-separated clusters:")
X_bad, y_bad = make_blobs(n_samples=10, centers=2, cluster_std=5.0, random_state=42)
print("Dunn Index:", dunn_score(X_bad, y_bad))
print("Dunn Index by torch:", tensor_dunn_score(X_bad, y_bad))

# 4. Verification for identical points
print("\n4. Verification for identical points:")
X_identical = np.array([[1, 1], [1, 1], [5, 5], [5, 5]])
y_identical = np.array([0, 0, 1, 1])
try:
    print("Dunn Index:", dunn_score(X_identical, y_identical))
except ValueError as e:
    print("Caught expected error for identical points:", e)

# 5. Manhattan mertic test
print("\n5. Manhattan mertic test:")
X_custom, y_custom = make_blobs(n_samples=10, centers=2, cluster_std=1.0, random_state=42)
print("Dunn Index:", dunn_score(X_custom, y_custom, metric='manhattan'))
print("Dunn Index by torch:", tensor_dunn_score(X_custom, y_custom))


1. Simple verification on synthetic data:
Dunn Index: 10.303433516719945
Dunn Index by torch: tensor(10.3034, dtype=torch.float64)

2. Verification with one cluster (must be ValueError):
Caught expected error for single cluster: The number of unique clusters must be at least 2.

3. Verification for for poorly-separated clusters:
Dunn Index: 1.0376130343703116
Dunn Index by torch: tensor(1.0376, dtype=torch.float64)

4. Verification for identical points:
Caught expected error for identical points: All points in the same cluster are identical.

5. Manhattan mertic test:
Dunn Index: 5.6687513821301545
Dunn Index by torch: tensor(5.1316, dtype=torch.float64)
