In [53]:
using System;
using System.Collections.Generic;
using System.Linq;

public class Cluster
{
    public List<double> DataPoints { get; set; } = new List<double>();
    public double Average => DataPoints.Count > 0 ? DataPoints.Average() : 0;
    public double Count => DataPoints.Count;
    public static double CalculateMedian(List<double> sortedData)
    {
        sortedData.Sort();
        int mid = sortedData.Count / 2;
        return sortedData.Count % 2 != 0 ? sortedData[mid] : (sortedData[mid - 1] + sortedData[mid]) / 2.0;
    }
}

public class ClusteringResult
{
    public List<Cluster> Clusters { get; set; } = new List<Cluster>();
    public List<double> Anomalies { get; set; } = new List<double>();
}

public class Clustering
{
    public static ClusteringResult KMeansClusteringWithAnomalyDetection(IEnumerable<double> data, int k, double anomalyThreshold, int maxIterations = 100)
    {
        var dataPoints = data.ToList();
        if (dataPoints.Count == 0 || k <= 0 || k > dataPoints.Count) throw new ArgumentException("Invalid data or cluster count");

        // Step 1: Initialize centroids randomly
        Random random = new Random();
        var centroids = dataPoints.OrderBy(_ => random.Next()).Take(k).ToList();
        var clusters = new List<Cluster>();
        var anomalies = new List<double>();

        for (int iteration = 0; iteration < maxIterations; iteration++)
        {
            // Step 2: Assign points to the nearest centroid
            clusters = Enumerable.Range(0, k).Select(_ => new Cluster()).ToList();

            foreach (var point in dataPoints)
            {
                var nearestCentroid = centroids
                    .Select((c, index) => new { Index = index, Distance = Math.Abs(point - c) })
                    .OrderBy(x => x.Distance)
                    .First();

                if (nearestCentroid.Distance > anomalyThreshold)
                {
                    // Mark as anomaly if the distance is beyond the threshold
                    anomalies.Add(point);
                }
                else
                {
                    // Otherwise, assign to the nearest cluster
                    clusters[nearestCentroid.Index].DataPoints.Add(point);
                }
            }

            // Step 3: Recalculate centroids
            var newCentroids = clusters.Select(cluster => cluster.DataPoints.Count > 0 ? cluster.Average : 0).ToList();

            // Check for convergence (if centroids haven't changed, break the loop)
            if (centroids.SequenceEqual(newCentroids))
                break;

            centroids = newCentroids;
        }

        return new ClusteringResult
        {
            Clusters = clusters,
            Anomalies = anomalies
        };
    }

     public static ClusteringResult KMediansClusteringWithAnomalyDetection(IEnumerable<double> data, int k, int maxIterations = 100)
    {
        var dataPoints = data.ToList();
        if (dataPoints.Count == 0 || k <= 0 || k > dataPoints.Count) throw new ArgumentException("Invalid data or cluster count");

        // Step 1: Calculate dynamic anomaly threshold based on data distribution
        double mean = dataPoints.Average();
        double stdDev = Math.Sqrt(dataPoints.Select(x => Math.Pow(x - mean, 2)).Average());
        double anomalyThreshold = mean + 2 * stdDev; // Using mean + 3σ for anomaly detection

        Console.WriteLine($"Anomaly Threshold (Mean + 3σ): {anomalyThreshold:F2}");

        // Step 2: Initialize centroids randomly
        Random random = new Random();
        var centroids = dataPoints.OrderBy(_ => random.Next()).Take(k).ToList();
        var clusters = new List<Cluster>();
        var anomalies = new List<double>();

        for (int iteration = 0; iteration < maxIterations; iteration++)
        {
            // Step 3: Assign points to the nearest centroid (using median)
            clusters = Enumerable.Range(0, k).Select(_ => new Cluster()).ToList();

            foreach (var point in dataPoints)
            {
                var nearestCentroid = centroids
                    .Select((c, index) => new { Index = index, Distance = Math.Abs(point - c) })
                    .OrderBy(x => x.Distance)
                    .First();

                if (nearestCentroid.Distance > anomalyThreshold)
                {
                    anomalies.Add(point); // Mark as anomaly
                }
                else
                {
                    clusters[nearestCentroid.Index].DataPoints.Add(point);
                }
            }

            // Step 4: Recalculate centroids as medians
            var newCentroids = clusters.Select(cluster => cluster.DataPoints.Count > 0 ? Cluster.CalculateMedian(cluster.DataPoints) : 0).ToList();

            // Check for convergence (if centroids haven't changed, break the loop)
            if (centroids.SequenceEqual(newCentroids))
                break;

            centroids = newCentroids;
        }

        return new ClusteringResult
        {
            Clusters = clusters,
            Anomalies = anomalies
        };
    }

    public static void DisplayClustersAndAnomaliesTextually(ClusteringResult result)
    {
        for (int i = 0; i < result.Clusters.Count; i++)
        {
            Console.WriteLine($"Cluster {i + 1}:");
            Console.WriteLine($" - Data Points: {string.Join(", ", result.Clusters[i].DataPoints)}");
            Console.WriteLine($" - Count: {result.Clusters[i].Count}");
            Console.WriteLine($" - Average: {result.Clusters[i].Average:F2}");
            Console.WriteLine();
        }

        Console.WriteLine("Anomalies:");
        if (result.Anomalies.Count > 0)
            Console.WriteLine($" - {string.Join(", ", result.Anomalies)}");
        else
            Console.WriteLine(" - None");
    }
}


In [52]:
var data = new List<double> { 1, 1, 4, 4, 9 };
int k = data.Count / 2; // Max account for bimodal distribution.
double anomalyThreshold = 10.0; // Set a threshold for anomaly detection

var result = Clustering.KMeansClusteringWithAnomalyDetection(data, k, anomalyThreshold);
Clustering.DisplayClustersAndAnomaliesTextually(result);

result = Clustering.KMediansClusteringWithAnomalyDetection(data, k);
Clustering.DisplayClustersAndAnomaliesTextually(result);

Cluster 1:
 - Data Points: 1, 1
 - Count: 2
 - Average: 1.00

Cluster 2:
 - Data Points: 4, 4, 9
 - Count: 3
 - Average: 5.67

Anomalies:
 - None
Anomaly Threshold (Mean + 3σ): 12.58
Cluster 1:
 - Data Points: 4, 4, 9
 - Count: 3
 - Average: 5.67

Cluster 2:
 - Data Points: 1, 1
 - Count: 2
 - Average: 1.00

Anomalies:
 - None
