# LockhartLab/LLTK

Switch branches/tags
Nothing to show
Fetching contributors…
Cannot retrieve contributors at this time
executable file 69 lines (54 sloc) 2.69 KB
 # cluster.py clusters a set of coordinates using Daura et al.'s algorithm # Written by Chris Lockhart in Python3 import numpy as np # Info: We are clustering structures using the algorithm from Mark (1999) # Angew. Chem. Int. Ed. 38 # Input: You must supply a distance_matrix of shape (N,N), where N is the # number of structures considered. In practice, this distance matrix # should be symmetrical, i.e., the distance from i to j is the same as # j to i. We do not explicitly check this. The cutoff is a float that # defines which structures are connected in the algorithm. The integer # min_cluster_size allows you to limit the output based on cluster size. # Output: The list of clusters. This list is in order save for the first # structure index, which is the centroid. def cluster(distance_matrix,cutoff,min_cluster_size=1): # Handle input distance_matrix=np.array(distance_matrix,dtype=float) cutoff=float(cutoff) min_cluster_size=int(min_cluster_size) # Must be of size (N,N) if distance_matrix.shape[0] != distance_matrix.shape[1]: raise AttributeError('matrix must be of size (N,N)') # List of available structures available=np.arange(distance_matrix.shape[0],dtype=int) # List to store clustering results result=[] # Generate clusters while len(available) != 0: # Blank variables as a placeholder cluster=np.array([],dtype=int) centroid=-1 size=0 # For every structure i, consider it as a centroid for i in available: # Compute the number of other structures connected to i index=(distance_matrix[i,available] size: cluster=cluster_i centroid=i size=size_i # Remove all our structures in cluster from available list available=np.array([i for i in available if i not in cluster],dtype=int) # Remove centroid from our cluster and add it to the front cluster=np.array([i for i in cluster if i != centroid],dtype=int) cluster=np.insert(cluster,0,centroid) # Save our cluster result.append(cluster) # Termination conditions if size < min_cluster_size: break # Return data back to user (as NumPy array, why not) return np.array(result)