In [1]:
import pandas as pd
import numpy as np 
from SOM_plus_clustering.som import SOM
from SOM_plus_clustering.som import kmeans
from sklearn import preprocessing
from sklearn.metrics import silhouette_score

In [2]:
df = pd.read_csv("iris_data.csv")
df.drop("Iris-setosa", axis=1, inplace=True)
df.head()

Unnamed: 0,5.1,3.5,1.4,0.2
0,4.9,3.0,1.4,0.2
1,4.7,3.2,1.3,0.2
2,4.6,3.1,1.5,0.2
3,5.0,3.6,1.4,0.2
4,5.4,3.9,1.7,0.4


In [3]:
X = df.values

In [4]:
def get_ss(X, y):
    try: 
        return silhouette_score(X,y)
    except:
        return None

In [12]:
def experiment(total_trial = 10, max_iteration = 100, lr = 1, nr = 5, epoch = 1, total_cluster = 3):
    list_df = list()
    for i in range(total_trial):
        
        model1 = kmeans(n_clusters=total_cluster, method="random")
        model1.fit(X, epochs=epoch)
        ss1 = get_ss(X,model1.predict(X))
        
        model2 = kmeans(n_clusters=total_cluster, method="kde")
        model2.fit(X, epochs=epoch)
        ss2 = get_ss(X,model2.predict(X))
        
        model3 = kmeans(n_clusters=total_cluster, method="kmeans++")
        model3.fit(X, epochs=epoch)
        ss3 = get_ss(X,model3.predict(X))
        
        model4 = SOM(m = total_cluster, n = total_cluster, dim = X.shape[1], initiate_method = "random", max_iter = max_iteration, learning_rate = lr, neighbour_rad = nr)
        model4.fit(X, epoch=epoch)
        ss4 = get_ss(X,model4.predict(X))
        
        model5 = SOM(m = total_cluster, n = total_cluster, dim = X.shape[1], initiate_method = "kmeans", max_iter = max_iteration, learning_rate = lr, neighbour_rad = nr)
        model5.fit(X,epoch=epoch)
        ss5 = get_ss(X,model5.predict(X))
        
        model6 = SOM(m = total_cluster, n = total_cluster, dim = X.shape[1], initiate_method = "kde_kmeans", max_iter = max_iteration, learning_rate = lr, neighbour_rad = nr)
        model6.fit(X, epoch=epoch)
        ss6 = get_ss(X,model6.predict(X))
        
        model7 = SOM(m = 1, n = total_cluster, dim = X.shape[1], initiate_method = "kmeans++", max_iter = max_iteration, learning_rate = lr, neighbour_rad = nr)
        model7.fit(X, epoch=epoch)
        ss7 = get_ss(X,model7.predict(X))
        
        model8 = SOM(m = 1, n = total_cluster, dim = X.shape[1], initiate_method = "SOM++", max_iter = max_iteration, learning_rate = lr, neighbour_rad = nr)
        model8.fit(X, epoch=epoch)
        ss8 = get_ss(X,model8.predict(X))
        
        model9 = SOM(m = 1, n = total_cluster, dim = X.shape[1], initiate_method = "kde", max_iter = max_iteration, learning_rate = lr, neighbour_rad = nr)
        model9.fit(X, epoch=epoch)
        ss9 = get_ss(X,model9.predict(X))
        
        list_shs = [ss1, ss2, ss3, ss4, ss5, ss6, ss7, ss8, ss9]
        list_df.append(list_shs)
    df_save = pd.DataFrame(list_df, columns=["random kmeans", "kde kmeans", "kmeans++", "random SOM", "kmeans SOM", "kde kmeans SOM", "kmeans++ SOM", "SOM++", "kde SOM"])
    return df_save

In [16]:
exp_df = experiment(total_trial = 30, 
                    max_iteration = 50000, 
                    lr = 0.7, 
                    nr = 1, 
                    epoch = 1, 
                    total_cluster = 3)
exp_df

Unnamed: 0,random kmeans,kde kmeans,kmeans++,random SOM,kmeans SOM,kde kmeans SOM,kmeans++ SOM,SOM++,kde SOM
0,0.683825,0.661844,0.547249,0.113681,0.477293,0.473689,0.535864,0.514727,0.210254
1,0.679097,0.679097,0.547932,0.684774,0.555201,0.248771,0.526601,0.534656,-0.012939
2,0.675293,,0.535476,0.423585,0.464505,0.181662,0.540706,0.488235,0.041007
3,,0.681306,0.550427,0.174482,0.27085,0.226645,0.504371,0.505449,0.503187
4,,0.683825,0.538548,0.410826,0.684774,0.250834,0.509962,0.52841,0.639503
5,0.684774,0.675293,0.516907,0.13981,0.430809,0.290756,0.513067,0.468425,0.684774
6,0.684774,0.679097,0.510902,-0.237342,0.302554,0.513134,0.529659,0.483305,
7,0.684774,0.684774,0.546878,0.33841,0.346516,0.458479,0.375282,0.528484,0.574432
8,0.640328,0.679097,0.536568,0.029934,0.66889,0.334324,0.48514,0.548561,0.684774
9,0.683825,0.675293,0.525465,0.219278,0.359946,0.253796,0.515904,0.515904,0.415947


In [14]:
exp_df.to_csv("silhouette_score_data.csv", index=False)