In [1]:
#This code is for calculating the distance between clusters 
#File needed is the Excel file 'DataCalled' in the folder 'evaluated_by_????' after running the neural network clustering algorithm

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from scipy.stats import norm

In [None]:
#Specify the folder path containing the Excel files 
folder_path = '/Users/geokir/Documents/Insulin cluster analysis/Control/4_evaluated_by_87B144'

#Initialize an empty DataFrame to store the results
result_df = pd.DataFrame(columns=['ClusterID', 'NearestClusterID', 'EdgeDistance'])

#Count the number of Excel files in the folder
file_count = sum(filename.endswith('.csv') for filename in os.listdir(folder_path))

#Display the number of Excel files
print("Number of Excel files in the folder:", file_count)

#Initialize a list to store all the nearest edges distances
nearest_distances = []
all_edge_distances = []

#Loop through all the files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        #Read the Excel file into a DataFrame
        file_path = os.path.join(folder_path, filename)
        data = pd.read_csv(file_path)

        #Exclude clusterID 0
        data = data[data['clusterID'] != 0]

        #Group the data by clusterID
        clusters = data.groupby('clusterID')

        #Calculate the nearest edges distances between clusters
        for cluster_id, group in clusters:
            cluster_coordinates = group[['x [nm]', 'y [nm]']].values

            min_distance_edges = float('inf')
            nearest_cluster_id = None

            for other_cluster_id, other_group in clusters:
                if other_cluster_id != cluster_id:
                    other_cluster_coordinates = other_group[['x [nm]', 'y [nm]']].values

                    # alculate the distance between cluster edges
                    distances_edges = cdist(cluster_coordinates, other_cluster_coordinates, metric='euclidean')
                    min_dist_edges = np.min(distances_edges)

                    
                    if min_dist_edges < min_distance_edges:
                        min_distance_edges = min_dist_edges
                        nearest_cluster_id = other_cluster_id

            result_df = result_df.append({'ClusterID': cluster_id, 'NearestClusterID': nearest_cluster_id, 'EdgeDistance': min_distance_edges}, ignore_index=True)


            all_edge_distances.append(min_distance_edges)
            
            
            
            
#Save the result DataFrame to a new Excel file
result_file_path = '/Users/geokir/Documents/Insulin cluster analysis/Control/4_evaluated_by_87B144/Distance_between_clusters.xlsx'
result_df.to_excel(result_file_path, index=False)

#Plot the edge distances as a histogram
plt.hist(all_edge_distances, bins='auto', density=True, alpha=0.5, color='lightgrey', edgecolor='black')

#Fit a Gaussian distribution to the edge distances
mu, std = norm.fit(all_edge_distances)
x = np.linspace(min(all_edge_distances), max(all_edge_distances), 100)
y = norm.pdf(x, mu, std)
plt.plot(x, y, 'r-', linewidth=1)
plt.xlim(0) 

#Set the plot title and labels
plt.title('Edge Distances Distribution')
plt.xlabel('Distance between clusters (nm)')
plt.ylabel('Probability Density')

#Specify the folder path to save the plot
save_folder_path = '/Users/geokir/Documents/Insulin cluster analysis/Control/4_evaluated_by_87B144'

#Save the plot as an image file
plot_filename = 'edge_distances_plot.png'
save_path = os.path.join(save_folder_path, plot_filename)
plt.savefig(save_path)

#Show the plot
plt.show()

#mean edge distance
mean_edge_distance = np.mean(all_edge_distances)
print("Mean Edge Distance (nm):", mean_edge_distance)