Merge pull request #25 from Cristina-Tobias/fix-flake8-errors

[REFACTOR] Flake8 repo
Cristina-Tobias · Mar 26, 2023 · 9eb1d45 · 9eb1d45
2 parents 18e039a + 95b39d1
commit 9eb1d45
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 20 deletions.
diff --git a/clustintime/Clustering.py b/clustintime/Clustering.py
@@ -14,16 +14,11 @@
 from community import community_louvain
 from networkx.algorithms import community
 from sklearn.cluster import (
-    DBSCAN,
-    OPTICS,
-    AffinityPropagation,
     AgglomerativeClustering,
     KMeans,
-    MeanShift,
 )
 
 import clustintime.Processing as proc
-import clustintime.Visualization as vis
 
 
 def generate_maps(labels, directory, data, masker, prefix):
@@ -99,8 +94,10 @@ def findCommunities(G):
 
 def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
     """
-    This algorithm samples the data and clusters it with a defined algorithm. With the results of each cluster, it creates a consensus matrix.
-    The consensus matrix is then clustered a hundred times. If the results are the same in every run, that will be the returned labels.
+    This algorithm samples the data and clusters it with a defined algorithm. With the results of each cluster,
+    it creates a consensus matrix.
+    The consensus matrix is then clustered a hundred times. If the results are the same in every run, that will
+    be the returned labels.
     If the results are not unanimous, the algorithm will return to the sampling step.
 
     Parameters
@@ -125,9 +122,9 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
     while 1:
         for i in range(100):
             sampling = np.sort(random.sample(range(npoints), round(npoints * 0.6)))
-            I = pd.DataFrame([0] * npoints)
-            I[0][sampling] = 1
-            I_sum = I_sum + np.dot(I, np.transpose(I))
+            filter_matrix = pd.DataFrame([0] * npoints)
+            filter_matrix[0][sampling] = 1
+            I_sum = I_sum + np.dot(filter_matrix, np.transpose(filter_matrix))
             data_sampled = corr_map[sampling, :][:, sampling]
             if algorithm == Info_Map or algorithm == Greedy_Mod or algorithm == Louvain:
                 corr_mat, idx = algorithm(data_sampled, indexes, thr, nscans)
@@ -156,7 +153,7 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
             labels = algorithm(corr_map=Consensus, indexes=indexes, thr=thr, nscans=npoints)
             connect = proc.compute_connectivity_matrix(npoints, labels[1])
 
-            if np.array_equal(aux, connect) == False:
+            if not np.array_equal(aux, connect):
                 boolean = False
                 break
         if boolean:
@@ -167,7 +164,8 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
 def K_Means(corr_map, indexes, nscans, n_clusters, seed=0):
     """
     K-Means uses a pre-stablished number of centroids and iterations defined by the user.
-    The algorithms places the centroids at random locations (real or imaginary, that represent the centre of the cluster) and then allocates each data point to the nearest cluster.
+    The algorithms places the centroids at random locations (real or imaginary, that represent the centre
+    of the cluster) and then allocates each data point to the nearest cluster.
     Afterwards, it will optimise the position of those centroids in the number of iterations defined.
 
     Parameters
@@ -237,7 +235,8 @@ def Agglomerative_Clustering(
         If `precomputed`, a distance matrix (instead of a similarity matrix) is needed as input for the fit method.
         The default is `euclidean`
     linkage : str, optional:
-        Linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm will merge the pairs of cluster that minimize this criterion.
+        Linkage criterion to use. The linkage criterion determines which distance to use between sets of observation.
+        The algorithm will merge the pairs of cluster that minimize this criterion.
         The options are `ward`, `complete`, `average`, `single`
 
     Returns
@@ -274,8 +273,11 @@ def Agglomerative_Clustering(
 
 def Info_Map(corr_map, indexes, thr, nscans):
     """
-    InfoMap uses information theory to find communities. In particular, it employs the Huffman code to understand the flow of information within a graph. This code assigns a prefix to each node, then a prefix to each community.
-    When a random walker enters a network, the probability that it transitions between two nodes is given by its Markov transition matrix. Nonetheless, once the walker find itself inside a region, it is relatively improbable that it transitions onto another.
+    InfoMap uses information theory to find communities. In particular, it employs the Huffman code to understand
+    the flow of information within a graph. This code assigns a prefix to each node, then a prefix to each community.
+    When a random walker enters a network, the probability that it transitions between two nodes is given by its Markov
+    transition matrix. Nonetheless, once the walker find itself inside a region, it is relatively improbable that it
+    transitions onto another.
     InfoMap uses a random walker and applies the aforementioned theories to to find regions and nodes belonging to them.
 
 

diff --git a/clustintime/Processing.py b/clustintime/Processing.py
@@ -65,7 +65,8 @@ def bfs(graph, source, dest):
 
 def RSS_peaks(corr_map, near):
     """
-    Calculates the RSS of the correlation maps and returns the indexes of the time-points with the highest scores and the time-points nearby.
+    Calculates the RSS of the correlation maps and returns the indexes of the time-points with
+    the highest scores and the time-points nearby.
 
     Parameters
     ----------
@@ -167,7 +168,8 @@ def correlation_with_window(data, window_length):
                     (temp, data[timepoint + window_idx + 1, :])
                 )  # The data is concatenated for all the rows in the window
         else:
-            # The last rows will be concatenated (since there are less rows than the specified length once the loop finishes, you can exit it)
+            # The last rows will be concatenated (since there are less rows than the specified length
+            # once the loop finishes, you can exit it)
             for window_idx in range(window_length):
                 if (timepoint + window_idx + 1) < (data.shape[0]):
                     temp = np.concatenate((temp, data[timepoint + window_idx + 1, :]))

diff --git a/clustintime/clustintime.py b/clustintime/clustintime.py
@@ -135,7 +135,7 @@ def clustintime(
         data[data < 0] = 0
 
     # Create data
-    if timings_file != None:
+    if timings_file is not None:
         # Load timings
         # 1D files are a txt file with the times in which the events occur. They are divided by the repetition_time.
         task = {}
@@ -154,7 +154,7 @@ def clustintime(
     nscans = corr_map.shape[0]
     indexes = range(corr_map.shape[0])
 
-    if processing != None:
+    if processing is not None:
         corr_map, indexes = proc.preprocess(
             corr_map=corr_map,
             analysis=processing,

diff --git a/setup.cfg b/setup.cfg
@@ -7,7 +7,7 @@ tag_prefix =
 parentdir_prefix =
 
 [flake8]
-max-line-length = 99
+max-line-length = 120
 exclude=*build/
 ignore = E203,E402,W503
 per-file-ignores =