In [60]:
import openml
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder #For encoding categorical variables
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans,AgglomerativeClustering,DBSCAN,OPTICS,AffinityPropagation,MeanShift,SpectralClustering,Birch
from sklearn.mixture import GaussianMixture
from sklearn.metrics import accuracy_score,f1_score,adjusted_rand_score,silhouette_score
from joblib import Parallel,delayed
import time
import scipy

# Download Datasets Using openml

In [61]:
iris = openml.datasets.get_dataset("iris")
iris_df, iris_label, categorical_indicator, attribute_names = iris.get_data(
    target=iris.default_target_attribute, dataset_format="dataframe"
)
iris_df["class"]=iris_label
iris_x=iris_df.iloc[:,:4]
iris_df

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [62]:
wine = openml.datasets.get_dataset("wine")
wine_df, wine_label, categorical_indicator, attribute_names = wine.get_data(
    target= wine.default_target_attribute, dataset_format="dataframe"
)
wine_df["class"]=wine_label
wine_x=wine_df.iloc[:,:13]
wine_df

Unnamed: 0,Alcohol,Malic_acid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,OD280%2FOD315_of_diluted_wines,Proline,class
0,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065,1
1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050,1
2,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185,1
3,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480,1
4,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740,3
174,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750,3
175,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835,3
176,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840,3


# Identify Data Types

In [63]:
iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   sepallength  150 non-null    float64 
 1   sepalwidth   150 non-null    float64 
 2   petallength  150 non-null    float64 
 3   petalwidth   150 non-null    float64 
 4   class        150 non-null    category
dtypes: category(1), float64(4)
memory usage: 5.1 KB


In [64]:
wine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column                          Non-Null Count  Dtype   
---  ------                          --------------  -----   
 0   Alcohol                         178 non-null    float64 
 1   Malic_acid                      178 non-null    float64 
 2   Ash                             178 non-null    float64 
 3   Alcalinity_of_ash               178 non-null    float64 
 4   Magnesium                       178 non-null    uint8   
 5   Total_phenols                   178 non-null    float64 
 6   Flavanoids                      178 non-null    float64 
 7   Nonflavanoid_phenols            178 non-null    float64 
 8   Proanthocyanins                 178 non-null    float64 
 9   Color_intensity                 178 non-null    float64 
 10  Hue                             178 non-null    float64 
 11  OD280%2FOD315_of_diluted_wines  178 non-null    float64 
 12  Proline               

# Transform categorical variable to numeric

In [65]:
le=LabelEncoder()

iris_y=le.fit_transform(iris_label)
wine_y=le.fit_transform(wine_label)

# Min-max normalise

In [66]:
scaler = MinMaxScaler()

iris_x_scaled=scaler.fit_transform(iris_x)
wine_x_scaled=scaler.fit_transform(wine_x)


# DataSets

In [67]:
Dataset = ["Iris","Wine"]

# Algorithms

In [83]:
def algorithm(x, y, algo, parameters):
    start_time = time.time()
    if algo=="kmeans":
        model=KMeans(n_clusters=parameters[0], max_iter=parameters[1], n_init=parameters[2])
    elif algo=="aglomerative":
        model= AgglomerativeClustering(n_clusters=parameters[0], linkage=parameters[1])
    elif algo=="dbscan":
        model= DBSCAN(eps = parameters[0], min_samples = parameters[1])
    elif algo == "optics":
        model=OPTICS(min_samples=parameters[0])
    elif algo=="ap":
        model= AffinityPropagation(preference=parameters[0], damping=parameters[1])
    elif algo=="meanshift":
        model=MeanShift(bandwidth =parameters[0])
    elif algo =="spectral":
        model=SpectralClustering(n_clusters =parameters[0], affinity=parameters[1],eigen_solver=parameters[2])
    elif algo == "gm":
        model=GaussianMixture(n_components=parameters[0], covariance_type=parameters[1])
   
        
    y_predict = model.fit_predict(x)
    f1score = f1_score(y, y_predict, average = 'weighted')
    ars = adjusted_rand_score(y, y_predict)
    n_clusters = len(set(y_predict)) - (1 if -1 in y_predict else 0)
    if n_clusters>=2 and n_clusters<=100:
        sscore = silhouette_score(x, y_predict, metric="euclidean")
    else:
        sscore = print("None")
    execution_time = time.time() - start_time
    return y_predict, f1score, ars, sscore, execution_time

# KMeans

In [84]:
n_clusters=[2,3,4,5,6,7,8,9,10,20,21,22,23,24,25]
max_iter=[200,300,400]
n_init=[5,10,15]
    
kmean_parameters = pd.DataFrame({"n_clusters":[] , 
                                 "max_iter":[] , 
                                 "n_init":[]}).astype(int)

for i in n_clusters:
        for  ite in max_iter:
            for n in n_init:
                kmean_parameters = kmean_parameters.append({"n_clusters" : i, 
                                                            "max_iter" : ite, 
                                                            "n_init" : n},ignore_index=True)
                
                
                
kmean_parameters

Unnamed: 0,n_clusters,max_iter,n_init
0,2,200,5
1,2,200,10
2,2,200,15
3,2,300,5
4,2,300,10
...,...,...,...
130,25,300,10
131,25,300,15
132,25,400,5
133,25,400,10


In [70]:
# parameters={"n_clusters":[2,3,4,5,6,7,8,9,10,20,21,22,23,24,25],"max_iter":[200,300,400],"n_init":[5,10,15]}
# df=pd.DataFrame(parameters)
# df = pd.DataFrame({k:pd.Series(v) for k,v in parameters.items()})
# for i,ite,n in parameters:
#         for  ite in max_iter:
#             for n in n_init:
#                 kmean_parameters =pd.DataFrame({"[n_clusters,max_iter,n_init]":[i,ite,n]},ignore_index=True)
                
                
                
# kmean_parameters

In [71]:
final_iris_kmeans = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "kmeans", kmean_parameters.iloc[i]) for i in range(0, len(kmean_parameters)))
final_wine_kmeans = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "kmeans", kmean_parameters.iloc[i]) for i in range(0, len(kmean_parameters)))

In [None]:
final_kmeans_df=pd.DataFrame({
    "Dataset":[],
    "[n_clusters,max_iter,n_init]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(kmean_parameters)):
    final_kmeans_df =final_kmeans_df.append({
        "Dataset":Dataset[0],
        "[n_clusters,max_iter,n_init]":kmean_parameters.iloc[i].to_list(),
        "f1 score":final_iris_kmeans[i][1],
        "Adjusted Random Score":final_iris_kmeans[i][2],
        "Silhouette Score":final_iris_kmeans[i][3],
        "Execution Time":final_iris_kmeans[i][4],
        },ignore_index=True)
    
for i in range(0, len(kmean_parameters)):
    final_kmeans_df =final_kmeans_df.append({
        "Dataset":Dataset[0],
        "[n_clusters,max_iter,n_init]":kmean_parameters.iloc[i].to_list(),
        "f1 score":final_wine_kmeans[i][1],
        "Adjusted Random Score":final_wine_kmeans[i][2],
        "Silhouette Score":final_wine_kmeans[i][3],
        "Execution Time":final_wine_kmeans[i][4],
        "Dataset":Dataset[1]
        },ignore_index=True)
    
final_kmeans_df

In [None]:
final_kmeans_df.to_csv('K-Means_Data.csv')

# Agglomerative Clustering

In [None]:
n_clusters =[2,3,4,5,6,7,8,9,10,11,12,13,14,15]
linkage = ["ward", "complete", "average", "single"]

agglomerative_parameters = pd.DataFrame({
    "n_clusters":[] ,
    "linkage":[]}).astype(int)
  
for i in n_clusters:
        for  n in linkage:
                agglomerative_parameters = agglomerative_parameters.append({
                    "n_clusters" : i, 
                    "linkage" : n },
                    ignore_index=True)
                
agglomerative_parameters

In [None]:
final_iris_aggromilative = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "aglomerative", agglomerative_parameters.iloc[i]) for i in range(0, len(agglomerative_parameters)))
final_wine_aggromilative = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "aglomerative", agglomerative_parameters.iloc[i]) for i in range(0, len(agglomerative_parameters)))

In [None]:
final_aggromilative_df=pd.DataFrame({
    "Dataset":[],
    "[n_clusters,linkage]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(agglomerative_parameters)):
    final_aggromilative_df =final_aggromilative_df.append({
        "Dataset":Dataset[0],
        "[n_clusters,linkage]":agglomerative_parameters.iloc[i].to_list(),
        "f1 score":final_iris_aggromilative[i][1],
        "Adjusted Random Score":final_iris_aggromilative[i][2],
        "Silhouette Score":final_iris_aggromilative[i][3],
        "Execution Time":final_iris_aggromilative[i][4],
        },ignore_index=True)
    
for i in range(0, len(agglomerative_parameters)):
    final_aggromilative_df =final_aggromilative_df.append({
        "Dataset":Dataset[1],
        "[n_clusters,linkage]":agglomerative_parameters.iloc[i].to_list(),
        "f1 score":final_wine_aggromilative[i][1],
        "Adjusted Random Score":final_wine_aggromilative[i][2],
        "Silhouette Score":final_wine_aggromilative[i][3],
        "Execution Time":final_wine_aggromilative[i][4],
        },ignore_index=True)
    
final_aggromilative_df

In [None]:
final_aggromilative_df.to_csv('Aggromilative_Data.csv')

# DBScan Clustering

In [None]:
eps = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
min_samples = [2,3,4,5,6,7,8]

dbscan_parameters = pd.DataFrame({
    "eps":[],
    "min_samples" :[]
}).astype(int)

for i in eps:
    for n in min_samples:
        dbscan_parameters=dbscan_parameters.append({
            "eps" : i,
            "min_samples": n
        },ignore_index=True)
        
dbscan_parameters

In [None]:
final_iris_dbscan = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "dbscan", dbscan_parameters.iloc[i]) for i in range(0, len(dbscan_parameters)))
final_wine_dbscan = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "dbscan", dbscan_parameters.iloc[i]) for i in range(0, len(dbscan_parameters)))

In [None]:
final_dbscan_df=pd.DataFrame({
    "Dataset":[],
    "[eps,min_samples]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(dbscan_parameters)):
    final_dbscan_df =final_dbscan_df.append({
        "Dataset":Dataset[0],
        "[eps,min_samples]":dbscan_parameters.iloc[i].to_list(),
        "f1 score":final_iris_dbscan[i][1],
        "Adjusted Random Score":final_iris_dbscan[i][2],
        "Silhouette Score":final_iris_dbscan[i][3],
        "Execution Time":final_iris_dbscan[i][4],
        },ignore_index=True)

for i in range(0, len(dbscan_parameters)):
    final_dbscan_df =final_dbscan_df.append({
        "Dataset":Dataset[1],
        "[eps,min_samples]":dbscan_parameters.iloc[i].to_list(),
        "f1 score":final_wine_dbscan[i][1],
        "Adjusted Random Score":final_wine_dbscan[i][2],
        "Silhouette Score":final_wine_dbscan[i][3],
        "Execution Time":final_wine_dbscan[i][4],
        },ignore_index=True)
    
final_dbscan_df  

In [None]:
final_dbscan_df.to_csv('DBScan_Data.csv')

In [None]:
# n_clusters = len(set(y_dbscan)) - (1 if -1 in y_dbscan else 0)
# n_noise = list(y_dbscan).count(-1)

# print('Estimated number of clusters: %d' % n_clusters)
# print('Estimated number of noise points: %d' % n_noise)

Unlike k-means, DBSCAN will figure out the number of clusters. 
DBSCAN works by determining whether the minimum number of points are close enough to 
one another to be considered part of a single cluster. DBSCAN is very sensitive to scale since 
epsilon is a fixed value for the maximum distance between two points.

# Optics Clustering

In [88]:
min_samples = [2,3,4,5,6,7,8]

optics_parameters = pd.DataFrame({
    "min_samples" :[]
}).astype(int)

for n in min_samples:
    optics_parameters = optics_parameters.append({
    "min_samples": n},ignore_index=True)
        
optics_parameters

Unnamed: 0,min_samples
0,2
1,3
2,4
3,5
4,6
5,7
6,8


In [89]:
final_iris_optics = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "optics", optics_parameters.iloc[i]) for i in range(0, len(optics_parameters)))
final_wine_optics = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "optics", optics_parameters.iloc[i]) for i in range(0, len(optics_parameters)))

In [90]:
final_optics_df=pd.DataFrame({
    "Dataset":[],
    "[min_samples]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [91]:
for i in range(0, len(optics_parameters)):
    final_optics_df =final_optics_df.append({
        "Dataset":Dataset[0],
        "[eps,min_samples]":optics_parameters.iloc[i].to_list(),
        "f1 score":final_iris_optics[i][1],
        "Adjusted Random Score":final_iris_optics[i][2],
        "Silhouette Score":final_iris_optics[i][3],
        "Execution Time":final_iris_optics[i][4],
        },ignore_index=True)

for i in range(0, len(optics_parameters)):
    final_optics_df =final_optics_df.append({
        "Dataset":Dataset[1],
        "[eps,min_samples]":optics_parameters.iloc[i].to_list(),
        "f1 score":final_wine_optics[i][1],
        "Adjusted Random Score":final_wine_optics[i][2],
        "Silhouette Score":final_wine_optics[i][3],
        "Execution Time":final_wine_optics[i][4],
        },ignore_index=True)
    
final_dbscan_df  

Unnamed: 0,Dataset,"[eps,min_samples]",f1 score,Adjusted Random Score,Silhouette Score,Execution Time
0,Iris,"[0.1, 2.0]",0.505427,0.426260,0.230234,0.015957
1,Iris,"[0.1, 3.0]",0.505427,0.428689,0.202901,0.040890
2,Iris,"[0.1, 4.0]",0.473502,0.393767,0.134221,0.013962
3,Iris,"[0.1, 5.0]",0.406043,0.441407,0.057464,0.011969
4,Iris,"[0.1, 6.0]",0.461289,0.366987,0.057155,0.007977
...,...,...,...,...,...,...
121,Wine,"[0.9, 4.0]",0.165031,0.000000,,0.015620
122,Wine,"[0.9, 5.0]",0.165031,0.000000,,0.015620
123,Wine,"[0.9, 6.0]",0.165031,0.000000,,0.015620
124,Wine,"[0.9, 7.0]",0.165031,0.000000,,0.015620


# Gaussian mixtures Clustering

In [None]:
n_components=[1,2,3,4,5,6,7,8,9,10]
covariance_type=["full", "tied","diag", "spherical"]

gm_parameters=pd.DataFrame({
    "n_components":[],
    "covariance_type":[]
}).astype(int)

for i in n_components:
    for n in covariance_type:
        gm_parameters=gm_parameters.append({
            "n_components":i,
            "covariance_type":n
        },ignore_index=True)
        
gm_parameters

In [None]:
final_iris_gm = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "gm", gm_parameters.iloc[i]) for i in range(0, len(gm_parameters)))
final_wine_gm = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "gm", gm_parameters.iloc[i]) for i in range(0, len(gm_parameters)))

In [None]:
final_gm_df=pd.DataFrame({
    "Dataset":[],
    "[n_components,covariance_type]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(gm_parameters)):
    final_gm_df =final_gm_df.append({
        "Dataset":Dataset[0],
        "[n_components,covariance_type]":gm_parameters.iloc[i].to_list(),
        "f1 score":final_iris_gm[i][1],
        "Adjusted Random Score":final_iris_gm[i][2],
        "Silhouette Score":final_iris_gm[i][3],
        "Execution Time":final_iris_gm[i][4],
        },ignore_index=True)

for i in range(0, len(gm_parameters)):
    final_gm_df =final_gm_df.append({
        "Dataset":Dataset[1],
        "[n_components,covariance_type]":gm_parameters.iloc[i].to_list(),
        "f1 score":final_wine_gm[i][1],
        "Adjusted Random Score":final_wine_gm[i][2],
        "Silhouette Score":final_wine_gm[i][3],
        "Execution Time":final_wine_gm[i][4],
        },ignore_index=True)
    
final_gm_df  

In [None]:
final_gm_df.to_csv('Gaussian_Mixtures_Data.csv')

# Affinity propagation

In [None]:
preference = [-1,-3,-5,-7,-9,-11,-13]
damping = [0.5,0.7,0.9]

ap_parameters = pd.DataFrame({
    "preference":[],
    "damping" :[]
}).astype(int)

for i in preference:
    for n in damping:
        ap_parameters=ap_parameters.append({
            "preference":i,
            "damping" :n
        },ignore_index=True)
        
ap_parameters

In [None]:
final_iris_ap = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "ap", ap_parameters.iloc[i]) for i in range(0, len(ap_parameters)))
final_wine_ap = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "ap", ap_parameters.iloc[i]) for i in range(0, len(ap_parameters)))

In [None]:
final_ap_df=pd.DataFrame({
    "Dataset":[],
    "[preference, damping]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(ap_parameters)):
    final_ap_df =final_ap_df.append({
        "Dataset":Dataset[0],
        "[preference, damping]":ap_parameters.iloc[i].to_list(),
        "f1 score":final_iris_ap[i][1],
        "Adjusted Random Score":final_iris_ap[i][2],
        "Silhouette Score":final_iris_ap[i][3],
        "Execution Time":final_iris_ap[i][4],
        },ignore_index=True)
    
for i in range(0, len(ap_parameters)):
    final_ap_df =final_ap_df.append({
        "Dataset":Dataset[1],
        "[preference, damping]":ap_parameters.iloc[i].to_list(),
        "f1 score":final_wine_ap[i][1],
        "Adjusted Random Score":final_wine_ap[i][2],
        "Silhouette Score":final_wine_ap[i][3],
        "Execution Time":final_wine_ap[i][4],
        },ignore_index=True)
    
    
final_ap_df 

In [None]:
final_ap_df.to_csv('Affinity_Propagation_Data.csv')

# Mean-shift 

In [None]:
bandwidth = [0.2,0.3,0.4,0.5,0.6,0.7,0.8]

meanshift_parameters = pd.DataFrame(bandwidth,columns=["bandwidth"])


# for i in bandwidth:
#         meanshift_parameters=meanshift_parameters.append({
#             "bandwidth":i,
#         },ignore_index=True)
        
meanshift_parameters

In [None]:
final_iris_meanshift = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "meanshift",meanshift_parameters.iloc[i]) for i in range(0, len(meanshift_parameters)))
final_wine_meanshift = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "meanshift",meanshift_parameters.iloc[i]) for i in range(0, len(meanshift_parameters)))

In [None]:
final_meanshift_df=pd.DataFrame({
    "Dataset":[],
    "[bandwidth]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(meanshift_parameters)):
    final_meanshift_df =final_meanshift_df.append({
        "Dataset":Dataset[0],
        "[bandwidth]":meanshift_parameters.iloc[i].to_list(),
        "f1 score":final_iris_meanshift[i][1],
        "Adjusted Random Score":final_iris_meanshift[i][2],
        "Silhouette Score":final_iris_meanshift[i][3],
        "Execution Time":final_iris_meanshift[i][4],
        },ignore_index=True)
    
for i in range(0, len(meanshift_parameters)):
    final_meanshift_df =final_meanshift_df.append({
        "Dataset":Dataset[1],
        "[bandwidth]":meanshift_parameters.iloc[i].to_list(),
        "f1 score":final_wine_meanshift[i][1],
        "Adjusted Random Score":final_wine_meanshift[i][2],
        "Silhouette Score":final_wine_meanshift[i][3],
        "Execution Time":final_wine_meanshift[i][4],
        },ignore_index=True)
    
    
final_meanshift_df 

In [None]:
final_meanshift_df.to_csv('Meanshift_Data.csv')

# Spectral Clustering

In [None]:
n_clusters=[2,3,4,5,6,7,8]
affinity=["nearest_neighbors","rbf"]
eigen_solver=["arpack"]

    
spectral_parameters = pd.DataFrame({"n_clusters":[] , 
                                 "affinity":[] , 
                                 "eigen_solver":[]}).astype(int)

for i in n_clusters:
        for  af in affinity:
            for n in eigen_solver:
                spectral_parameters = spectral_parameters.append({"n_clusters" : i, 
                                                            "affinity" : af, 
                                                            "eigen_solver" : n},ignore_index=True)
                
                
spectral_parameters

In [None]:
final_iris_spectral = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, "spectral", spectral_parameters.iloc[i]) for i in range(0, len(spectral_parameters)))
final_wine_spectral = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "spectral", spectral_parameters.iloc[i]) for i in range(0, len(spectral_parameters)))

In [None]:
final_spectral_df=pd.DataFrame({
    "Dataset":[],
    "[n_clusters, affinity]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(spectral_parameters)):
    final_spectral_df =final_spectral_df.append({
        "Dataset":Dataset[0],
        "[n_clusters, affinity]":spectral_parameters.iloc[i].to_list(),
        "f1 score":final_iris_spectral[i][1],
        "Adjusted Random Score":final_iris_spectral[i][2],
        "Silhouette Score":final_iris_spectral[i][3],
        "Execution Time":final_iris_spectral[i][4],
        },ignore_index=True)
    
for i in range(0, len(spectral_parameters)):
    final_spectral_df =final_spectral_df.append({
        "Dataset":Dataset[1],
        "[n_clusters, affinity]":spectral_parameters.iloc[i].to_list(),
        "f1 score":final_wine_spectral[i][1],
        "Adjusted Random Score":final_wine_spectral[i][2],
        "Silhouette Score":final_wine_spectral[i][3],
        "Execution Time":final_wine_spectral[i][4],
        },ignore_index=True)
    
    
final_spectral_df 

In [None]:
final_spectral_df .to_csv('Spectral_Data.csv')

# BIRCH (Balanced Iterative Reducing and Clustering using Hierarchies )

In [None]:
def algorithm(x, y, parameters):
    start_time = time.time()
    model= Birch(n_clusters=3,threshold=parameters[0])  
    y_predict = model.fit_predict(x)
    f1score = f1_score(y, y_predict, average = 'weighted')
    ars = adjusted_rand_score(y, y_predict)
#   sscore = silhouette_score(x, y_predict, metric="euclidean")
    execution_time = time.time() - start_time
    return y_predict, f1score, ars, execution_time

In [None]:
threshold=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8]
    
birch_parameters = pd.DataFrame(threshold,columns=["threshold"])
            
birch_parameters

In [None]:
final_iris_birch = Parallel(n_jobs=-1)(delayed(algorithm)(iris_x_scaled, iris_y, birch_parameters.iloc[i]) for i in range(0, len(birch_parameters)))
# final_wine_birch = Parallel(n_jobs=-1)(delayed(algorithm)(wine_x_scaled, wine_y, "birch", birch_parameters.iloc[i]) for i in range(0, len(birch_parameters)))

In [None]:
final_spectral_df=pd.DataFrame({
    "Dataset":[],
    "[n_clusters, affinity]":[],
    "f1 score":[],
    "Adjusted Random Score":[],
    "Silhouette Score":[],
    "Execution Time":[],
})

In [None]:
for i in range(0, len(spectral_parameters)):
    final_spectral_df =final_spectral_df.append({
        "Dataset":Dataset[0],
        "[n_clusters, affinity]":spectral_parameters.iloc[i].to_list(),
        "f1 score":final_iris_spectral[i][1],
        "Adjusted Random Score":final_iris_spectral[i][2],
        "Silhouette Score":final_iris_spectral[i][3],
        "Execution Time":final_iris_spectral[i][4],
        },ignore_index=True)
    
for i in range(0, len(spectral_parameters)):
    final_spectral_df =final_spectral_df.append({
        "Dataset":Dataset[1],
        "[n_clusters, affinity]":spectral_parameters.iloc[i].to_list(),
        "f1 score":final_wine_spectral[i][1],
        "Adjusted Random Score":final_wine_spectral[i][2],
        "Silhouette Score":final_wine_spectral[i][3],
        "Execution Time":final_wine_spectral[i][4],
        },ignore_index=True)
    
    
final_spectral_df 

# Ward hierarchical

In [None]:
from sklearn import metrics
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 0, 1, 1, 2]
metrics.rand_score(labels_true, labels_pred)

In [None]:
ap =MeanShift(bandwidth=0.75)
y_predict = ap.fit_predict(wine_x_scaled)

y_predict

In [None]:
model= Birch(threshold=0.1, n_clusters =3)
y_predict = model.fit_predict(iris_x_scaled)
y_predict