Skip to content

Commit

Permalink
Merge pull request #25 from Cristina-Tobias/fix-flake8-errors
Browse files Browse the repository at this point in the history
[REFACTOR] Flake8 repo
  • Loading branch information
Cristina-Tobias committed Mar 26, 2023
2 parents 18e039a + 95b39d1 commit 9eb1d45
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 20 deletions.
32 changes: 17 additions & 15 deletions clustintime/Clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,11 @@
from community import community_louvain
from networkx.algorithms import community
from sklearn.cluster import (
DBSCAN,
OPTICS,
AffinityPropagation,
AgglomerativeClustering,
KMeans,
MeanShift,
)

import clustintime.Processing as proc
import clustintime.Visualization as vis


def generate_maps(labels, directory, data, masker, prefix):
Expand Down Expand Up @@ -99,8 +94,10 @@ def findCommunities(G):

def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
"""
This algorithm samples the data and clusters it with a defined algorithm. With the results of each cluster, it creates a consensus matrix.
The consensus matrix is then clustered a hundred times. If the results are the same in every run, that will be the returned labels.
This algorithm samples the data and clusters it with a defined algorithm. With the results of each cluster,
it creates a consensus matrix.
The consensus matrix is then clustered a hundred times. If the results are the same in every run, that will
be the returned labels.
If the results are not unanimous, the algorithm will return to the sampling step.
Parameters
Expand All @@ -125,9 +122,9 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
while 1:
for i in range(100):
sampling = np.sort(random.sample(range(npoints), round(npoints * 0.6)))
I = pd.DataFrame([0] * npoints)
I[0][sampling] = 1
I_sum = I_sum + np.dot(I, np.transpose(I))
filter_matrix = pd.DataFrame([0] * npoints)
filter_matrix[0][sampling] = 1
I_sum = I_sum + np.dot(filter_matrix, np.transpose(filter_matrix))
data_sampled = corr_map[sampling, :][:, sampling]
if algorithm == Info_Map or algorithm == Greedy_Mod or algorithm == Louvain:
corr_mat, idx = algorithm(data_sampled, indexes, thr, nscans)
Expand Down Expand Up @@ -156,7 +153,7 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
labels = algorithm(corr_map=Consensus, indexes=indexes, thr=thr, nscans=npoints)
connect = proc.compute_connectivity_matrix(npoints, labels[1])

if np.array_equal(aux, connect) == False:
if not np.array_equal(aux, connect):
boolean = False
break
if boolean:
Expand All @@ -167,7 +164,8 @@ def consensus(corr_map, indexes, nscans, n_clusters, algorithm, thr):
def K_Means(corr_map, indexes, nscans, n_clusters, seed=0):
"""
K-Means uses a pre-stablished number of centroids and iterations defined by the user.
The algorithms places the centroids at random locations (real or imaginary, that represent the centre of the cluster) and then allocates each data point to the nearest cluster.
The algorithms places the centroids at random locations (real or imaginary, that represent the centre
of the cluster) and then allocates each data point to the nearest cluster.
Afterwards, it will optimise the position of those centroids in the number of iterations defined.
Parameters
Expand Down Expand Up @@ -237,7 +235,8 @@ def Agglomerative_Clustering(
If `precomputed`, a distance matrix (instead of a similarity matrix) is needed as input for the fit method.
The default is `euclidean`
linkage : str, optional:
Linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm will merge the pairs of cluster that minimize this criterion.
Linkage criterion to use. The linkage criterion determines which distance to use between sets of observation.
The algorithm will merge the pairs of cluster that minimize this criterion.
The options are `ward`, `complete`, `average`, `single`
Returns
Expand Down Expand Up @@ -274,8 +273,11 @@ def Agglomerative_Clustering(

def Info_Map(corr_map, indexes, thr, nscans):
"""
InfoMap uses information theory to find communities. In particular, it employs the Huffman code to understand the flow of information within a graph. This code assigns a prefix to each node, then a prefix to each community.
When a random walker enters a network, the probability that it transitions between two nodes is given by its Markov transition matrix. Nonetheless, once the walker find itself inside a region, it is relatively improbable that it transitions onto another.
InfoMap uses information theory to find communities. In particular, it employs the Huffman code to understand
the flow of information within a graph. This code assigns a prefix to each node, then a prefix to each community.
When a random walker enters a network, the probability that it transitions between two nodes is given by its Markov
transition matrix. Nonetheless, once the walker find itself inside a region, it is relatively improbable that it
transitions onto another.
InfoMap uses a random walker and applies the aforementioned theories to to find regions and nodes belonging to them.
Expand Down
6 changes: 4 additions & 2 deletions clustintime/Processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def bfs(graph, source, dest):

def RSS_peaks(corr_map, near):
"""
Calculates the RSS of the correlation maps and returns the indexes of the time-points with the highest scores and the time-points nearby.
Calculates the RSS of the correlation maps and returns the indexes of the time-points with
the highest scores and the time-points nearby.
Parameters
----------
Expand Down Expand Up @@ -167,7 +168,8 @@ def correlation_with_window(data, window_length):
(temp, data[timepoint + window_idx + 1, :])
) # The data is concatenated for all the rows in the window
else:
# The last rows will be concatenated (since there are less rows than the specified length once the loop finishes, you can exit it)
# The last rows will be concatenated (since there are less rows than the specified length
# once the loop finishes, you can exit it)
for window_idx in range(window_length):
if (timepoint + window_idx + 1) < (data.shape[0]):
temp = np.concatenate((temp, data[timepoint + window_idx + 1, :]))
Expand Down
4 changes: 2 additions & 2 deletions clustintime/clustintime.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def clustintime(
data[data < 0] = 0

# Create data
if timings_file != None:
if timings_file is not None:
# Load timings
# 1D files are a txt file with the times in which the events occur. They are divided by the repetition_time.
task = {}
Expand All @@ -154,7 +154,7 @@ def clustintime(
nscans = corr_map.shape[0]
indexes = range(corr_map.shape[0])

if processing != None:
if processing is not None:
corr_map, indexes = proc.preprocess(
corr_map=corr_map,
analysis=processing,
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ tag_prefix =
parentdir_prefix =

[flake8]
max-line-length = 99
max-line-length = 120
exclude=*build/
ignore = E203,E402,W503
per-file-ignores =
Expand Down

0 comments on commit 9eb1d45

Please sign in to comment.