Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 69 lines (54 sloc) 2.69 KB
# cluster.py clusters a set of coordinates using Daura et al.'s algorithm
# Written by Chris Lockhart in Python3
import numpy as np
# Info: We are clustering structures using the algorithm from Mark (1999)
# Angew. Chem. Int. Ed. 38
# Input: You must supply a distance_matrix of shape (N,N), where N is the
# number of structures considered. In practice, this distance matrix
# should be symmetrical, i.e., the distance from i to j is the same as
# j to i. We do not explicitly check this. The cutoff is a float that
# defines which structures are connected in the algorithm. The integer
# min_cluster_size allows you to limit the output based on cluster size.
# Output: The list of clusters. This list is in order save for the first
# structure index, which is the centroid.
def cluster(distance_matrix,cutoff,min_cluster_size=1):
# Handle input
distance_matrix=np.array(distance_matrix,dtype=float)
cutoff=float(cutoff)
min_cluster_size=int(min_cluster_size)
# Must be of size (N,N)
if distance_matrix.shape[0] != distance_matrix.shape[1]:
raise AttributeError('matrix must be of size (N,N)')
# List of available structures
available=np.arange(distance_matrix.shape[0],dtype=int)
# List to store clustering results
result=[]
# Generate clusters
while len(available) != 0:
# Blank variables as a placeholder
cluster=np.array([],dtype=int)
centroid=-1
size=0
# For every structure i, consider it as a centroid
for i in available:
# Compute the number of other structures connected to i
index=(distance_matrix[i,available]<cutoff).nonzero()[0]
cluster_i=available[index]
size_i=len(cluster_i)
# If the size of this cluster is the greatest, save it
if size_i > size:
cluster=cluster_i
centroid=i
size=size_i
# Remove all our structures in cluster from available list
available=np.array([i for i in available
if i not in cluster],dtype=int)
# Remove centroid from our cluster and add it to the front
cluster=np.array([i for i in cluster if i != centroid],dtype=int)
cluster=np.insert(cluster,0,centroid)
# Save our cluster
result.append(cluster)
# Termination conditions
if size < min_cluster_size: break
# Return data back to user (as NumPy array, why not)
return np.array(result)