Cristina-Tobias · Cristina-Tobias · Mar 26, 2023 · Mar 26, 2023
diff --git a/clustintime/Clustering.py b/clustintime/Clustering.py
@@ -4,25 +4,26 @@
 Clustering library for clustintime
 """
 import os
+import random
 
 import infomap
 import networkx as nx  # creation, manipulation and study of the structure, dynamics and functions of complex networks
 import nibabel as nib
 import numpy as np
 import pandas as pd
-import random
-
-import clustintime.Visualization as vis
-import clustintime.Processing as proc
-from sklearn.cluster import KMeans
-from sklearn.cluster import DBSCAN
-from sklearn.cluster import OPTICS
-from sklearn.cluster import AffinityPropagation
-from sklearn.cluster import MeanShift
-from sklearn.cluster import AgglomerativeClustering
-from networkx.algorithms import community 
 from community import community_louvain
+from networkx.algorithms import community
+from sklearn.cluster import (
+    DBSCAN,
+    OPTICS,
+    AffinityPropagation,
+    AgglomerativeClustering,
+    KMeans,
+    MeanShift,
+)
 
+import clustintime.Processing as proc
+import clustintime.Visualization as vis
 
 
 def generate_maps(labels, directory, data, masker, prefix):
@@ -56,14 +57,15 @@ def generate_maps(labels, directory, data, masker, prefix):
 
     for map_idx in range(len(unique)):
         mean_img = np.mean(data[labels == map_idx + 1], axis=0)
-        if mean_img.min()/mean_img.max() < 0.9:
-            mean_img = mean_img/mean_img.max()
+        if mean_img.min() / mean_img.max() < 0.9:
+            mean_img = mean_img / mean_img.max()
         mean_img_3d = masker.inverse_transform(
             mean_img
         )  # Transform the averaged image into a 3D image
-        
+
         nib.save(
-            mean_img_3d, os.path.join(directory, f"{directory}/{prefix}_cluster_{map_idx+1}.nii.gz")
+            mean_img_3d,
+            os.path.join(directory, f"{directory}/{prefix}_cluster_{map_idx+1}.nii.gz"),
         )
 
 
@@ -94,12 +96,13 @@ def findCommunities(G):
     nx.set_node_attributes(G, values=communities, name="community")
     return communities
 
+
 def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
     """
-    This algorithm samples the data and clusters it with a defined algorithm. With the results of each cluster, it creates a consensus matrix. 
+    This algorithm samples the data and clusters it with a defined algorithm. With the results of each cluster, it creates a consensus matrix.
     The consensus matrix is then clustered a hundred times. If the results are the same in every run, that will be the returned labels.
-    If the results are not unanimous, the algorithm will return to the sampling step.   
-    
+    If the results are not unanimous, the algorithm will return to the sampling step.
+
     Parameters
     ----------
     corr_map : matrix
@@ -121,38 +124,38 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
     I_sum = np.zeros([npoints, npoints])
     while 1:
         for i in range(100):
-            sampling = np.sort(random.sample(range(npoints), round(npoints*0.6)))
-            I = pd.DataFrame([0]*npoints)
+            sampling = np.sort(random.sample(range(npoints), round(npoints * 0.6)))
+            I = pd.DataFrame([0] * npoints)
             I[0][sampling] = 1
-            I_sum = I_sum + np.dot(I,np.transpose(I))  
-            data_sampled = corr_map[sampling,:][:,sampling]
+            I_sum = I_sum + np.dot(I, np.transpose(I))
+            data_sampled = corr_map[sampling, :][:, sampling]
             if algorithm == Info_Map or algorithm == Greedy_Mod or algorithm == Louvain:
-                corr_mat, idx = algorithm(data_sampled, indexes, thr, nscans) 
+                corr_mat, idx = algorithm(data_sampled, indexes, thr, nscans)
                 idx = np.transpose(
                     pd.DataFrame([idx, sampling])
-                    )  # Create a vector that combines the previous indexes and the labels
-            else: 
-                idx = algorithm(data_sampled, indexes, nscans, n_clusters) 
+                )  # Create a vector that combines the previous indexes and the labels
+            else:
+                idx = algorithm(data_sampled, indexes, nscans, n_clusters)
                 idx = np.transpose(
                     pd.DataFrame([idx, sampling])
-                    )  # Create a vector that combines the previous indexes and the labels
+                )  # Create a vector that combines the previous indexes and the labels
             idx = idx.set_index(1)
             idx = idx[np.logical_not(np.isnan(idx[0]))]
-            labels = np.array([0]*npoints)
+            labels = np.array([0] * npoints)
             labels[sampling] = idx[0]
-            M = proc.compute_connectivity_matrix(npoints, labels) 
+            M = proc.compute_connectivity_matrix(npoints, labels)
         M_sum = M_sum + M
-        Consensus = np.divide(M_sum,I_sum)
+        Consensus = np.divide(M_sum, I_sum)
         Consensus[Consensus < proc.find_threshold_bfs(Consensus)] = 0
         final_labels = algorithm(Consensus, indexes, 0, nscans)
         thr = proc.find_threshold_bfs(Consensus)
         Consensus[Consensus <= thr] = 0
-        aux = proc.compute_connectivity_matrix(npoints, final_labels[1]) 
+        aux = proc.compute_connectivity_matrix(npoints, final_labels[1])
         boolean = True
         for i in range(100):
-            labels = algorithm(corr_map = Consensus,indexes=indexes,thr=thr, nscans=npoints)
+            labels = algorithm(corr_map=Consensus, indexes=indexes, thr=thr, nscans=npoints)
             connect = proc.compute_connectivity_matrix(npoints, labels[1])
-            
+
             if np.array_equal(aux, connect) == False:
                 boolean = False
                 break
@@ -161,7 +164,7 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
     return labels[1]
 
 
-def K_Means(corr_map, indexes, nscans, n_clusters, seed = 0):
+def K_Means(corr_map, indexes, nscans, n_clusters, seed=0):
     """
     K-Means uses a pre-stablished number of centroids and iterations defined by the user.
     The algorithms places the centroids at random locations (real or imaginary, that represent the centre of the cluster) and then allocates each data point to the nearest cluster.
@@ -192,7 +195,7 @@ def K_Means(corr_map, indexes, nscans, n_clusters, seed = 0):
 
     print(" ")
 
-    KM = KMeans(n_clusters=n_clusters, random_state = seed)
+    KM = KMeans(n_clusters=n_clusters, random_state=seed)
     labels = KM.fit_predict(corr_map)
 
     labels = np.transpose(
@@ -205,14 +208,16 @@ def K_Means(corr_map, indexes, nscans, n_clusters, seed = 0):
     # assign to each timepoint their label
 
     for i in labels.index:
-        i=int(i)
+        i = int(i)
         final_labels[i] = labels[0][i] + 1
     print("K-Means applied!")
 
     return final_labels
 
 
-def Agglomerative_Clustering(corr_map, indexes, nscans, n_clusters, affinity = 'euclidean', linkage = 'ward'):
+def Agglomerative_Clustering(
+    corr_map, indexes, nscans, n_clusters, affinity="euclidean", linkage="ward"
+):
     """
     Agglomerative Clustering recursively merges the pair of clusters that minimally increases a given linkage distance.
 
@@ -227,8 +232,8 @@ def Agglomerative_Clustering(corr_map, indexes, nscans, n_clusters, affinity = '
     n_clusters : int
         Number of clusters.
     affinity : str, optional
-        Metric used to compute the linkage. Can be `euclidean`, `l1`, `l2`, `manhattan`, `cosine`, or `precomputed`. 
-        If linkage is `ward`, only `euclidean` is accepted. 
+        Metric used to compute the linkage. Can be `euclidean`, `l1`, `l2`, `manhattan`, `cosine`, or `precomputed`.
+        If linkage is `ward`, only `euclidean` is accepted.
         If `precomputed`, a distance matrix (instead of a similarity matrix) is needed as input for the fit method.
         The default is `euclidean`
     linkage : str, optional:
@@ -247,7 +252,7 @@ def Agglomerative_Clustering(corr_map, indexes, nscans, n_clusters, affinity = '
 
     print(" ")
 
-    AG = AgglomerativeClustering(n_clusters = n_clusters, affinity = affinity, linkage = linkage)
+    AG = AgglomerativeClustering(n_clusters=n_clusters, affinity=affinity, linkage=linkage)
     labels = AG.fit_predict(corr_map)
 
     labels = np.transpose(
@@ -260,14 +265,13 @@ def Agglomerative_Clustering(corr_map, indexes, nscans, n_clusters, affinity = '
     # assign to each timepoint their label
 
     for i in labels.index:
-        i=int(i)
+        i = int(i)
         final_labels[i] = labels[0][i] + 1
     print("Agglomerative Clustering applied!")
 
     return final_labels
 
 
-
 def Info_Map(corr_map, indexes, thr, nscans):
     """
     InfoMap uses information theory to find communities. In particular, it employs the Huffman code to understand the flow of information within a graph. This code assigns a prefix to each node, then a prefix to each community.
@@ -298,7 +302,6 @@ def Info_Map(corr_map, indexes, thr, nscans):
     print("Applying InfoMap... ")
     print(" ")
 
-
     corr_map = proc.thr_index(corr_map, thr)
     corr_smooth_binary = corr_map != 0  # Find all the voxels with correlation
 
@@ -322,6 +325,7 @@ def Info_Map(corr_map, indexes, thr, nscans):
 
     return corr_smooth_binary, final_labels
 
+
 def Louvain(corr_map, indexes, thr, nscans):
     """
     Louvain's algorithm maximises modularity and implements an extra step to ensure community properties in the network.
@@ -336,7 +340,7 @@ def Louvain(corr_map, indexes, thr, nscans):
         Percentile threshold for the binarization.
     nscans : int
         Number of scans.
- 
+
     Returns
     -------
     corr_map : matrix
@@ -349,12 +353,10 @@ def Louvain(corr_map, indexes, thr, nscans):
     print("Applying Louvain... ")
     print(" ")
 
-
-
     # compute the best partition
 
     corr_map = proc.thr_index(corr_map, thr)
-    
+
     corr_smooth_binary = corr_map != 0  # Find all the voxels with correlation
 
     G = nx.from_numpy_matrix(corr_smooth_binary)  # Again the binary
@@ -375,9 +377,9 @@ def Louvain(corr_map, indexes, thr, nscans):
 
     print("Louvain applied")
 
-
     return corr_smooth_binary, final_labels
 
+
 def Greedy_Mod(corr_map, indexes, thr, nscans):
     """
     Greedy modularity maximises modularity.
@@ -405,12 +407,10 @@ def Greedy_Mod(corr_map, indexes, thr, nscans):
     print("Applying Greedy Modularity... ")
     print(" ")
 
-
-
     # compute the best partition
 
     corr_map = proc.thr_index(corr_map, thr)
-    
+
     corr_smooth_binary = corr_map != 0  # Find all the voxels with correlation
 
     G = nx.from_numpy_matrix(corr_smooth_binary)  # Again the binary
@@ -431,6 +431,4 @@ def Greedy_Mod(corr_map, indexes, thr, nscans):
 
     print("Greedy Modularity applied")
 
-
     return corr_smooth_binary, final_labels
-