In [20]:
import os
import numpy as np

# kmeans clustering algorithm
# data = set of data points
# k = number of clusters
# c = initial list of centroids (if provided)
#
def kmeans(data, k, c):
    centroids = []

    centroids = randomize_centroids(data, centroids, k)  

    old_centroids = [[] for i in range(k)] 

    iterations = 0
    while not (has_converged(centroids, old_centroids, iterations)):
        iterations += 1

        clusters = [[] for i in range(k)]

        # assign data points to clusters
        clusters = euclidean_dist(data, centroids, clusters)

        # recalculate centroids
        index = 0
        for cluster in clusters:
            old_centroids[index] = centroids[index]
            centroids[index] = np.mean(cluster, axis=0).tolist()
            index += 1
    print("The total number of data instances is: " + str(len(data)))
    print("The total number of iterations necessary is: " + str(iterations))
    print("The means of each cluster are: " + str(centroids))
    print("The clusters are as follows:")
    for cluster in clusters:
        print("Cluster with a size of " + str(len(cluster)) + " starts here:")
        print(np.array(cluster).tolist())
        print("Cluster ends here.")

    return

# Calculates euclidean distance between
# a data point and all the available cluster
# centroids.      
def euclidean_dist(data, centroids, clusters):
    for instance in data:  
        # Find which centroid is the closest
        # to the given data point.
        mu_index = min([(i[0], np.linalg.norm(instance-centroids[i[0]])) \
                            for i in enumerate(centroids)], key=lambda t:t[1])[0]
        try:
            clusters[mu_index].append(instance)
        except KeyError:
            clusters[mu_index] = [instance]

    # If any cluster is empty then assign one point
    # from data set randomly so as to not have empty
    # clusters and 0 means.        
    for cluster in clusters:
        if not cluster:
            cluster.append(data[np.random.randint(0, len(data), size=1)].flatten().tolist())

    return clusters


# randomize initial centroids
def randomize_centroids(data, centroids, k):
    for cluster in range(0, k):
        centroids.append(data[np.random.randint(0, len(data), size=1)].flatten().tolist())
    return centroids


# check if clusters have converged    
def has_converged(centroids, old_centroids, iterations):
    MAX_ITERATIONS = 1000
    if iterations > MAX_ITERATIONS:
        return True
    return old_centroids == centroids

In [21]:
import numpy as np

__all__ = ['generate_test_data']

def _gradient(x, y,z):
    return (x * x - 1.0) * 4.0 * x + 0.5, (4.0 * y * y - 7.0) * y, (4.0 * z * z - 7.0) * z

def _bd(x0, y0, z0, length, dt=0.005):
    coeff_A = dt
    coeff_B = np.sqrt(2.0 * dt)
    x = [x0]
    y = [y0]
    z = [z0]
    for _i in range(1, length):
        dx, dy, dz = _gradient(x[-1], y[-1],z[-1])
        x.append(x[-1] - coeff_A * dx + coeff_B * np.random.normal())
        y.append(y[-1] - coeff_A * dy + coeff_B * np.random.normal())
        z.append(z[-1] - coeff_A * dz + coeff_B * np.random.normal())
    return np.array([[_x, _y,_z] for _x, _y, _z in zip(x, y, z)], dtype=np.float64)

def generate_test_data(traj_length=20000, num_trajs=5):
    r"""
    This functions handles the test data generation via Brownian dynamics simulations with
    randomized starting configurations.

    Parameters
    ----------
    traj_length : int, optional, default=20000
        Length of a single trajectory.
    num_trajs : int, optional, default=5
        Number of independent trajectories.

    Returns
    -------
    trajs : list of numpy.ndarray(shape=(traj_length, 2), dtype=numpy.float64) objects
        Time series of configurations of the toy model.
    """
    trajs = []
    for _i in range(num_trajs):
        trajs.append(_bd(3.0 * np.random.rand() - 1.5, 3.0 * np.random.rand() - 1.5,3.0 * np.random.rand() - 1.5, traj_length))
    return trajs



In [22]:
_gradient(3, 4,6) 

(96.5, 228.0, 822.0)

In [23]:
def _gradient(x, y,z):
    return (x * x - 1.0) * 4.0 * x + 0.5, (4.0 * y * y - 7.0) * y, (4.0 * z * z - 7.0) * z

In [131]:
a=generate_test_data(traj_length=2000, num_trajs=5)



In [25]:
centroids, clusters = kmeans(a[0], 8, 5)
print(centroids)


The total number of data instances is: 10
The total number of iterations necessary is: 5
The means of each cluster are: [[-1.1123898903001397, 0.5898322768966007, 1.7937576960149157], [-0.9643707052983688, 0.7145257062945702, 1.5895287058376462], [-0.9005508628641243, 0.670332617769541, 1.6352515314071436], [-0.775777139361843, 0.6126253303247244, 1.443774591889969], [-0.8486911365443668, 0.8453750086441679, 1.4889677352510884], [-1.0153248652852886, 0.5445114572629689, 1.6776141514752716], [-1.0891151772545837, 0.7340095283086837, 1.7473675431927242], [-1.0131819099790738, 0.7413575586997249, 1.674785642322036]]
The clusters are as follows:
Cluster with a size of 2 starts here:
[[-1.0810673409700675, 0.5468347997344518, 1.8650449861432667], [-1.143712439630212, 0.6328297540587494, 1.7224704058865645]]
Cluster ends here.
Cluster with a size of 2 starts here:
[[-0.9772190370876125, 0.7096358152925288, 1.6005471907137487], [-0.9515223735091252, 0.7194155972966116, 1.5785102209615436]]
Cl

TypeError: 'NoneType' object is not iterable

In [87]:
import os
import numpy as np

# kmeans clustering algorithm
# data = set of data points
# k = number of clusters
# c = initial list of centroids (if provided)
#
def kmeans2(data, k, c):
    centroids = []

    centroids = randomize_centroids(data, centroids, k)  

    old_centroids = [[] for i in range(k)] 

    iterations = 0
    while not (has_converged(centroids, old_centroids, iterations)):
        iterations += 1

        clusters = [[] for i in range(k)]

        # assign data points to clusters
        clusters = euclidean_dist(data, centroids, clusters)

        # recalculate centroids
        index = 0
        for cluster in clusters:
            old_centroids[index] = centroids[index]
            centroids[index] = np.mean(cluster, axis=0).tolist()
            index += 1


    

    return centroids,clusters

In [88]:
cen, clus = kmeans2(a[2], 8, 5)

clus

[[array([-1.08971298, -0.59135358, -0.18794611]),
  array([-1.07736783, -0.66283481, -0.2368212 ])],
 [array([-0.8758094 , -0.91700496, -0.17199934]),
  array([-0.94743867, -0.98447704, -0.21855727]),
  array([-0.84671993, -1.03124792, -0.0827932 ]),
  array([-0.82223148, -1.00513172, -0.02908447])],
 [array([-0.98346831, -0.8408927 , -0.03970446])],
 [array([-0.90614779, -0.52345638, -0.37111519]),
  array([-0.99022955, -0.51806868, -0.35896074]),
  array([-0.97206626, -0.51031183, -0.30174158])],
 [array([-0.9632593 , -0.62987616, -0.08668948])],
 [array([-0.91190765, -0.46728917, -0.19604152])],
 [array([-0.67005988, -1.30338953, -0.17402751])],
 [array([-0.79810367, -1.05552731, -0.11813857]),
  array([-0.87962587, -1.14096728, -0.09082794])]]

In [68]:
v=np.array([ 0.86940909,  0.3294346 , -1.37870144])

In [93]:
clus[1]


[array([-0.8758094 , -0.91700496, -0.17199934]),
 array([-0.94743867, -0.98447704, -0.21855727]),
 array([-0.84671993, -1.03124792, -0.0827932 ]),
 array([-0.82223148, -1.00513172, -0.02908447])]

In [158]:
vs=clus[0][0]

In [96]:
L=[0 for i in range(len(a[2]))]
L


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [113]:
def cloust_list(L, clus_0, a_0, w):
    for i in range(len (clus_0)): 
        for k in range(len (a_0)):
            if  all(a_0[k]==clus_0[i]):
                L[k]=w
    return L








In [121]:
cloust_list(L, clus[5],a[2],5)



[0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [120]:
clus[5]

[array([-0.91190765, -0.46728917, -0.19604152])]

In [118]:
a[2]

array([[-0.9632593 , -0.62987616, -0.08668948],
       [-0.91190765, -0.46728917, -0.19604152],
       [-0.90614779, -0.52345638, -0.37111519],
       [-0.99022955, -0.51806868, -0.35896074],
       [-0.97206626, -0.51031183, -0.30174158],
       [-1.08971298, -0.59135358, -0.18794611],
       [-1.07736783, -0.66283481, -0.2368212 ],
       [-0.98346831, -0.8408927 , -0.03970446],
       [-0.8758094 , -0.91700496, -0.17199934],
       [-0.94743867, -0.98447704, -0.21855727],
       [-0.84671993, -1.03124792, -0.0827932 ],
       [-0.82223148, -1.00513172, -0.02908447],
       [-0.79810367, -1.05552731, -0.11813857],
       [-0.87962587, -1.14096728, -0.09082794],
       [-0.67005988, -1.30338953, -0.17402751]])

In [122]:
clus

[[array([-1.08971298, -0.59135358, -0.18794611]),
  array([-1.07736783, -0.66283481, -0.2368212 ])],
 [array([-0.8758094 , -0.91700496, -0.17199934]),
  array([-0.94743867, -0.98447704, -0.21855727]),
  array([-0.84671993, -1.03124792, -0.0827932 ]),
  array([-0.82223148, -1.00513172, -0.02908447])],
 [array([-0.98346831, -0.8408927 , -0.03970446])],
 [array([-0.90614779, -0.52345638, -0.37111519]),
  array([-0.99022955, -0.51806868, -0.35896074]),
  array([-0.97206626, -0.51031183, -0.30174158])],
 [array([-0.9632593 , -0.62987616, -0.08668948])],
 [array([-0.91190765, -0.46728917, -0.19604152])],
 [array([-0.67005988, -1.30338953, -0.17402751])],
 [array([-0.79810367, -1.05552731, -0.11813857]),
  array([-0.87962587, -1.14096728, -0.09082794])]]

In [128]:
def Clustering(trag,nclus):
    cen, clus = kmeans2(trag, nclus, 5)
    L=[0 for i in range(len(a[2]))]
    for  b in range(nclus):
        cloust_list(L, clus[b],trag,b)
    return L
    

In [112]:
cloust_list(L, clus[2],a[0],2)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [110]:
cloust_list(L, clus[3],a[0],3)

[0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 7, 7, 0]