In [83]:
# Install necessary libraries
# !pip install pycaret



In [115]:
# Import necessary libraries
import pandas as pd
import numpy as np
from pycaret.clustering import *
from sklearn.preprocessing import MinMaxScaler, PowerTransformer
from sklearn.decomposition import PCA
from sklearn.cluster import SpectralClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score


In [91]:
# Load Dataset
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)

# Define the clustering setup
clu_setup = setup(df)

Unnamed: 0,Description,Value
0,Session id,533
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


In [162]:
def evaluate_model(model, data):
    labels = model.fit_predict(data)
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)
    return {'Silhouette': silhouette, 'Calinski-Harabasz': calinski_harabasz, 'Davies-Bouldin': davies_bouldin}


In [178]:
clusters = [3, 4, 5]
preprocessing_methods = ['No Data Processing', 'Normalization', 'Transform', 'T+N', 'T+N+PCA']
kmeans_results_df = pd.DataFrame(columns=['Preprocessing Method', 'Parameters/No. of Clusters', 'Silhouette', 'Calinski-Harabasz', 'Davies-Bouldin'])
hierarchical_results_df = pd.DataFrame(columns=['Preprocessing Method', 'Parameters/No. of Clusters', 'Silhouette', 'Calinski-Harabasz', 'Davies-Bouldin'])
spectral_results_df = pd.DataFrame(columns=['Preprocessing Method', 'Parameters/No. of Clusters', 'Silhouette', 'Calinski-Harabasz', 'Davies-Bouldin'])


In [179]:
for technique in ['K-Means', 'Hierarchical', 'Spectral']:
    if technique == 'K-Means':
        print("USING K-MEANS CLUSTERING")
        model_type = 'kmeans'
        results_df = kmeans_results_df
    elif technique == 'Hierarchical':
        print("\n\n USING HIERARCHICAL CLUSTERINGg")
        model_type = 'hclust'
        results_df = hierarchical_results_df
    elif technique == 'Spectral':
        print("\n\n USING SPECTRAL CLUSTERING")
        model_type = 'spectral'
        results_df = spectral_results_df

    results_df = pd.DataFrame(columns=[
        'Clustering Technique',
        'Preprocessing Method',
        'Parameters/No. of Clusters',
        'Silhouette',
        'Calinski-Harabasz',
        'Davies-Bouldin'
    ])

    for parameter in preprocessing_methods:
        for cluster in clusters:
            if 'Normalization' in parameter:
                df_processed = pd.DataFrame(MinMaxScaler().fit_transform(df), columns=df.columns)
            elif 'Transform' in parameter:
                df_processed = pd.DataFrame(PowerTransformer().fit_transform(df), columns=df.columns)
            elif 'PCA' in parameter:
                df_processed = pd.DataFrame(PCA().fit_transform(df), columns=df.columns)
            else:
                df_processed = df.copy()
            if model_type == 'spectral':
                model = SpectralClustering(n_clusters=cluster, random_state=42)
            else:
                model = create_model(model_type, num_clusters=cluster, verbose=False)


            metrics = evaluate_model(model, df_processed)

            results_df = pd.concat([results_df, pd.DataFrame({
            'Clustering Technique' :[ technique],
            'Preprocessing Method': [parameter],
            'Parameters/No. of Clusters': [cluster],
            'Silhouette': [metrics['Silhouette']],
            'Calinski-Harabasz': [metrics['Calinski-Harabasz']],
            'Davies-Bouldin': [metrics['Davies-Bouldin']],

        })], ignore_index=True)

    results_df = results_df.reset_index(drop=True)
    transposed_df = results_df.transpose()
    print(transposed_df)


    if technique == 'K-Means':
        kmeans_results_df=results_df
    elif technique == 'Hierarchical':
        hierarchical_results_df=results_df
    elif technique == 'Spectral':
        spectral_results_df=results_df


USING K-MEANS CLUSTERING
                                            0                   1   \
Clustering Technique                   K-Means             K-Means   
Preprocessing Method        No Data Processing  No Data Processing   
Parameters/No. of Clusters                   3                   4   
Silhouette                            0.552819            0.498051   
Calinski-Harabasz                   561.627757          530.765808   
Davies-Bouldin                        0.661972            0.780307   

                                            2              3              4   \
Clustering Technique                   K-Means        K-Means        K-Means   
Preprocessing Method        No Data Processing  Normalization  Normalization   
Parameters/No. of Clusters                   5              3              4   
Silhouette                            0.488749       0.504769       0.445065   
Calinski-Harabasz                   495.541488     359.845074     314.472999   
Davi

In [170]:

separator_df = pd.DataFrame([[]])

combined_transposed_df = pd.concat([kmeans_results_df.transpose(),
                                    separator_df,
                                    hierarchical_results_df.transpose(),
                                    separator_df,
                                    spectral_results_df.transpose()],
                                   axis=0)

combined_transposed_df.to_csv("combined_results_transposed.csv", index=True)

print("Combined transposed results saved to combined_results_transposed.csv")

Combined transposed results saved to combined_results_transposed.csv
