In [21]:
import numpy as np
import pandas as pd
from collections import Counter  
from sklearn.cluster import KMeans  


In [39]:
def get_cluster_parameter(data, labels, print_=False):
    cluster_counts = Counter(labels)
    n_clusters = len(cluster_counts)
    total_data_points = len(data)
    cluster_data_arr = pd.DataFrame()
    cluster_min_arr = np.zeros(n_clusters)
    cluster_max_arr = np.zeros(n_clusters)
    cluster_mean_arr = np.zeros(n_clusters)
    cluster_diff_arr = np.zeros(n_clusters)
    cluster_percentage_arr = np.zeros(n_clusters)
    cluster_std_arr = np.zeros(n_clusters)
    
    for i, cluster in enumerate(cluster_counts):
        # Get the data points in the cluster
        cluster_data = pd.DataFrame({i: data[labels == cluster].flatten()})
        if cluster_data_arr.empty:
            cluster_data_arr = cluster_data
        else:
            cluster_data_arr = pd.concat([cluster_data_arr, cluster_data], axis=1)
        cluster_percentage_arr[i] = (cluster_counts[cluster] / total_data_points) * 100
        # Calculate the mean of the data points in the cluster
        cluster_mean_arr[i] = np.nanmean(cluster_data)
        
        # Calculate the minimum and maximum of the data points in the cluster
        cluster_min_arr[i] = np.nanmin(cluster_data)
        cluster_max_arr[i] = np.nanmax(cluster_data)
        
        # calc standard deviation
        cluster_std_arr[i] = np.nanstd(cluster_data)
        
        # Calculate the difference between the maximum and minimum
        cluster_diff_arr[i] = cluster_max_arr[i] - cluster_min_arr[i]
        # Iterate over the clusters
    if print_:
        for i, cluster in enumerate(cluster_counts):
            print('Cluster:', cluster)
            print('Mean:', round(cluster_mean_arr[i], 2))
            print('Min:', cluster_min_arr[i])
            print('Max:', cluster_max_arr[i])
            print('Difference between max and min:', cluster_diff_arr[i])
            print('Percentage:', cluster_percentage_arr[i], '%\n')
        print('--------------------------------------')
    return cluster_data_arr, cluster_min_arr, cluster_max_arr, cluster_mean_arr, cluster_diff_arr, cluster_percentage_arr, cluster_std_arr

In [55]:
def find_cluster(data, n_clusters, print_=False, min_diff=0):
    """
    Find Clusters in an array, determine the range (2*std) of the cluster with the highest percentage of data points and return an array of angles in this range.
    
    Parameter:
        - data (ndarray): The array to find clusters in.
        - n_clusters (int): The number of clusters to find.
        - print_ (boolean): If True, print the Parameter of each cluster.
        - min_diff (float): The minimum difference between the means of two clusters. If the difference is smaller than this value, the number of clusters is reduced by one.
    Returns:
        - cluster_data_arr (2darray): The data points in each cluster.
        - cluster_min_arr (1darray): The minimum of each cluster.
        - cluster_max_arr (1darray): The maximum of each cluster.
        - cluster_mean_arr (1darray): The mean of each cluster.
        - cluster_diff_arr (1darray): The difference between the maximum and minimum of each cluster.
        - cluster_percentage_arr (1darray): The percentage of data points in each cluster.
        - cluster_std_arr (1darray): The standard deviation of each cluster.
    """
    
    # Flatten the 2D array
    data = data.flatten().reshape(-1, 1)
    
    # Fit the KMeans algorithm to the data
    kmeans = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(data)

    # Count the number of elements in each cluster
    labels = kmeans.labels_
    cluster_counts = Counter(labels)
    
 
    # get cluster parameter
    cluster_data_arr, cluster_min_arr, cluster_max_arr, cluster_mean_arr, cluster_diff_arr, cluster_percentage_arr, cluster_std_arr = get_cluster_parameter(data, labels, print_=True)

    if min_diff > 0:
        # Calculate the absolute difference between each mean and all other means
        diff_matrix = np.abs(cluster_mean_arr[:, None] - cluster_mean_arr)

        # Get the upper triangle of the matrix excluding the diagonal
        
        diff_matrix = np.triu(diff_matrix, k=1)
        diff_matrix[np.tril_indices(diff_matrix.shape[0])] = np.nan
        close_clusters = np.argwhere(diff_matrix < min_diff)
        
        if n_clusters > 1 and len(close_clusters) > 0:
            if print_:
                print(f'clusters are too close together, reducing number of clusters from {n_clusters} to {n_clusters-1}')
            find_cluster(data, n_clusters-1, print_=print_, min_diff=min_diff)
        else:
            return cluster_data_arr, cluster_min_arr, cluster_max_arr, cluster_mean_arr, cluster_diff_arr, cluster_percentage_arr, cluster_std_arr
    return cluster_data_arr, cluster_min_arr, cluster_max_arr, cluster_mean_arr, cluster_diff_arr, cluster_percentage_arr, cluster_std_arr
        

        

    

In [56]:
data = pd.read_csv('anglemap_test.csv', header=None).to_numpy()
t = find_cluster(data, 3, min_diff=0)


Cluster: 2
Mean: 150.71
Min: 146.0
Max: 152.0
Difference between max and min: 6.0
Percentage: 54.20560747663551 %

Cluster: 0
Mean: 154.44
Min: 153.0
Max: 156.0
Difference between max and min: 3.0
Percentage: 36.44859813084112 %

Cluster: 1
Mean: 138.9
Min: 133.0
Max: 144.0
Difference between max and min: 11.0
Percentage: 9.345794392523365 %

--------------------------------------


In [57]:
t = find_cluster(data, 3, print_=True, min_diff=10)

Cluster: 2
Mean: 150.71
Min: 146.0
Max: 152.0
Difference between max and min: 6.0
Percentage: 54.20560747663551 %

Cluster: 0
Mean: 154.44
Min: 153.0
Max: 156.0
Difference between max and min: 3.0
Percentage: 36.44859813084112 %

Cluster: 1
Mean: 138.9
Min: 133.0
Max: 144.0
Difference between max and min: 11.0
Percentage: 9.345794392523365 %

--------------------------------------
clusters are too close together, reducing number of clusters from 3 to 2
Cluster: 0
Mean: 152.4
Min: 148.0
Max: 156.0
Difference between max and min: 8.0
Percentage: 87.85046728971963 %

Cluster: 1
Mean: 140.54
Min: 133.0
Max: 146.0
Difference between max and min: 13.0
Percentage: 12.149532710280374 %

--------------------------------------
