In [4]:
# import the libraries
import numpy as np
import pandas as pd 
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objs as go
from plotly import tools
from plotly.subplots import make_subplots
import plotly.offline as py

In [26]:
def Cluster(segment=0, holes=1000, titletext=None ,Ellbow=False, Plotly=True, Save=True):
    if titletext==None:
        titletext=str(holes)+" holes at timestep "+str(segment)
    df = pd.read_table(str(holes)+".xyz", sep=" ",names=["type","x","y","z"], usecols=["x","y","z"], skip_blank_lines=1).dropna(axis=0).reset_index(drop=True)
    offset=segment*holes
    df=df[offset:offset+holes]
    # creating a two dimentional matrix 
    data = df[["x", "y", "z"]].values

    # find the optimal number of clusters using elbow method
    if Ellbow==True:
        WCSS = [] # Distance between each Point in Cluster and Centroid
        for i in range(1,11):
            model = KMeans(n_clusters = i,init = 'k-means++')
            model.fit(data)
            WCSS.append(model.inertia_)
        fig = plt.figure(figsize = (7,7))
        plt.plot(range(1,11),WCSS, linewidth=4, markersize=12,marker='o',color = 'green')
        plt.xticks(np.arange(11))
        plt.xlabel("Number of clusters")
        plt.ylabel("WCSS")
        plt.show()

    # since elbow occured at 7, hence the optimal number of clusters for the mall data is 5 

    model = KMeans(n_clusters = 7, init = "k-means++", max_iter = 300, n_init = 10, random_state = 0)
    y_clusters = model.fit_predict(data)



    # 3d scatterplot using matplotlib
    if Save==True:
        fig = plt.figure(figsize = (15,15))
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(data[y_clusters == 0,0],data[y_clusters == 0,1],data[y_clusters == 0,2], s = 40 , color = 'blue', label = "cluster 0")
        ax.scatter(data[y_clusters == 1,0],data[y_clusters == 1,1],data[y_clusters == 1,2], s = 40 , color = 'orange', label = "cluster 1")
        ax.scatter(data[y_clusters == 2,0],data[y_clusters == 2,1],data[y_clusters == 2,2], s = 40 , color = 'green', label = "cluster 2")
        ax.scatter(data[y_clusters == 3,0],data[y_clusters == 3,1],data[y_clusters == 3,2], s = 40 , color = '#D12B60', label = "cluster 3")
        ax.scatter(data[y_clusters == 4,0],data[y_clusters == 4,1],data[y_clusters == 4,2], s = 40 , color = 'purple', label = "cluster 4")
        ax.scatter(data[y_clusters == 5,0],data[y_clusters == 5,1],data[y_clusters == 5,2], s = 40 , color = 'gray', label = "cluster 5")

        ax.set_xlabel('X')
        ax.set_ylabel('Y')
        ax.set_zlabel('Z')
        ax.legend()
        plt.title(titletext)
        plt.savefig("fig/"+titletext+".png")
        plt.close(fig)



    if Plotly==True:
        # 3d scatterplot using plotly
        Scene = dict(xaxis = dict(title  = 'X'),yaxis = dict(title  = 'Y'),zaxis = dict(title  = 'Z'))

        # model.labels_ is nothing but the predicted clusters i.e y_clusters
        labels = model.labels_
        trace = go.Scatter3d(x=data[:, 0], y=data[:, 1], z=data[:, 2], mode='markers',marker=dict(color = labels, size= 10, line=dict(color= 'black',width = 10)))
        layout = go.Layout(margin=dict(l=0,r=0),scene = Scene,height = 800,width = 800)
        data0 = [trace]
        fig = go.Figure(data = data0, layout = layout)
        fig.update_layout(title=dict(text=titletext, font=dict(size=25)))
        fig.show()

In [27]:
#Time Series 100 holes
for i in range(0,50,5):
    Cluster(i,100, Plotly=False)

In [28]:
#Differen Hole Counts
for i in [10,100,300,500,1000]:
    Cluster(0,i, Plotly=False)

In [29]:
#Time Series 1000 holes
for i in range(7):
    Cluster(i,1000)