In [1]:
import time 
import warnings 

import numpy as np
import pandas as pd

from sklearn import cluster
from sklearn.metrics import silhouette_score

from sklearn.preprocessing import StandardScaler

import plotly.plotly as py
import plotly.graph_objs as go

In [7]:
def clusteringParams(X,para1,para2,algorithm):    
    t0 = time.time()
    
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="the number of connected components of the " +
            "connectivity matrix is [0-9]{1,2}" +
            " > 1. Completing it to avoid stopping the tree early.",
            category=UserWarning)
        warnings.filterwarnings(
            "ignore",
            message="Graph is not fully connected, spectral embedding" +
            " may not work as expected.",
            category=UserWarning)
        algorithm.fit(X)
    t1 = time.time()
    
    labels_num=len(np.unique(algorithm.labels_))

    if len(np.unique(algorithm.labels_))>1:
        silhouette=silhouette_score(X,labels=algorithm.labels_)        
    else:
        silhouette=-1
       
    return silhouette, (t1-t0)

In [4]:

userModel = pd.read_csv('fully_study_model.csv', sep=',', encoding='utf-8')  
X = StandardScaler().fit_transform(userModel)

In [5]:

algorithm1=cluster.DBSCAN(eps=1.0, min_samples=9)
algorithm2=cluster.SpectralClustering(n_clusters=2,eigen_tol=0.4)
algorithm3=cluster.AgglomerativeClustering(n_clusters=3)
algorithm4=cluster.MiniBatchKMeans(n_clusters=3,batch_size=3)

algorithms=[algorithm1,algorithm2,algorithm3,algorithm4]


In [8]:
algotime=[]
pointsX=[]
for algorithm in algorithms:
    tmpAlgoTime=[]
    for dataPercent in range(10,101,10):
        size=round(X.shape[0]*dataPercent/100)
        if len(pointsX)<10:
            pointsX.append(size)
        tmpX=userModel[0:size]
        silhouette, wtime = clusteringParams(tmpX,0,0,algorithm)
        tmpAlgoTime.append(wtime)
    algotime.append(tmpAlgoTime)


In [9]:
df=pd.DataFrame(algotime).transpose()

In [10]:

names = ['DBSCAN','Spectral Clustering','Agglomerative clustering','K-means']

data=[]
for algoNum in range(df.shape[1]):
    Y=df[algoNum]
    data.append(
        go.Scatter(
            x=pointsX,
            y=Y,
            mode='markers+lines',
            marker=dict(size=5),
            name=names[algoNum]
        )   
    )


layout = go.Layout(        
        xaxis=dict(title='Количество данных для обучения',ticklen= 5, showticklabels=True),
        yaxis=dict(title='Время, потраченное на обучение (в секундах)',ticklen= 5,zeroline= True,showticklabels=True),
    legend=dict(x=0.01, y=1)
    
    )
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, world_readable=True, filename='speed')
