In [25]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from tabulate import tabulate
import warnings
warnings.filterwarnings("ignore")

patients = load_diabetes()
X = patients.data

results_data = []

# Different values of N (number of clusters)
N_values = [3, 4, 5]

for N in N_values:
    preprocessing_techniques = {
        "No Preprocessing": X,
        "Normalization": MinMaxScaler().fit_transform(X),
        "PCA": PCA(n_components=2).fit_transform(X),
        "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
        "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
    }

    for technique, data in preprocessing_techniques.items():
        # Perform KMeans clustering
        clustering = KMeans(n_clusters=N, n_init='auto')
        labels = clustering.fit_predict(data)

        # Calculate evaluation metrics
        silhouette = silhouette_score(data, labels)
        calinski_harabasz = calinski_harabasz_score(data, labels)
        davies_bouldin = davies_bouldin_score(data, labels)

        # Append results to the list, displaying N only once per technique
        if technique == "No Preprocessing":
            results_data.append(["N = {}".format(N), technique, silhouette, calinski_harabasz, davies_bouldin])
        else:
            results_data.append(["", technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["N (Clusters)", "Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print("K-Means Clustering:")
print(tabulate(results_data, headers=headers, tablefmt='pretty'))


K-Means Clustering:
+--------------+---------------------------------+---------------------+-------------------------+----------------------+
| N (Clusters) |     Preprocessing Technique     |  Silhouette Score   | Calinski-Harabasz Score | Davies-Bouldin Score |
+--------------+---------------------------------+---------------------+-------------------------+----------------------+
|    N = 3     |        No Preprocessing         | 0.16211078341044485 |    112.5766549221183    |  1.842076807333007   |
|              |          Normalization          | 0.32959164656263246 |   286.55469010842927    |  1.3037254268673946  |
|              |               PCA               | 0.3350601267863381  |    363.5439327167556    |  1.026234398985492   |
|              |    Transform + Normalization    | 0.3430639009542098  |   290.97900516259386    |  0.9775690716294371  |
|              | Transform + Normalization + PCA |  0.586342856668023  |    791.5211559479443    |  0.5496171964526538  |
|   

In [23]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from tabulate import tabulate
import warnings
warnings.filterwarnings("ignore")

patients = load_diabetes()
X = patients.data

results_data = []

# Different values of N (number of clusters)
N_values = [3, 4, 5]

for N in N_values:
    preprocessing_techniques = {
        "No Preprocessing": X,
        "Normalization": MinMaxScaler().fit_transform(X),
        "PCA": PCA(n_components=2).fit_transform(X),
        "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
        "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
    }

    for technique, data in preprocessing_techniques.items():
        # Perform Agglomerative Hierarchical Clustering
        clustering = AgglomerativeClustering(n_clusters=N)
        labels = clustering.fit_predict(data)

        # Calculate evaluation metrics
        silhouette = silhouette_score(data, labels)
        calinski_harabasz = calinski_harabasz_score(data, labels)
        davies_bouldin = davies_bouldin_score(data, labels)

        # Append results to the list, displaying N only once per technique
        if technique == "No Preprocessing":
            results_data.append(["N = {}".format(N), technique, silhouette, calinski_harabasz, davies_bouldin])
        else:
            results_data.append(["", technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["N (Clusters)", "Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print("Agglomerative Hierarchical Clustering:")
print(tabulate(results_data, headers=headers, tablefmt='pretty'))


Agglomerative Hierarchical Clustering:
+--------------+---------------------------------+---------------------+-------------------------+----------------------+
| N (Clusters) |     Preprocessing Technique     |  Silhouette Score   | Calinski-Harabasz Score | Davies-Bouldin Score |
+--------------+---------------------------------+---------------------+-------------------------+----------------------+
|    N = 3     |        No Preprocessing         | 0.12496473908632375 |    95.75864313388199    |  2.1321806623880435  |
|              |          Normalization          | 0.3215407058889175  |    276.313467192541     |  1.3645430219385073  |
|              |               PCA               | 0.32326304313858956 |    323.8473474024072    |  1.0960993170328484  |
|              |    Transform + Normalization    | 0.31514302808860056 |    284.7483560778795    |  0.9938219629022732  |
|              | Transform + Normalization + PCA | 0.6127367955903301  |    957.8771844227998    |  0.50771

In [22]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import SpectralClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from tabulate import tabulate
import warnings
warnings.filterwarnings("ignore")

patients = load_diabetes()
X = patients.data

results_data = []

# Different values of N (number of clusters)
N_values = [3, 4, 5]

for N in N_values:
    preprocessing_techniques = {
        "No Preprocessing": X,
        "Normalization": MinMaxScaler().fit_transform(X),
        "PCA": PCA(n_components=2).fit_transform(X),
        "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
        "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
    }

    for technique, data in preprocessing_techniques.items():
        # Perform Spectral Clustering
        clustering = SpectralClustering(n_clusters=N)
        labels = clustering.fit_predict(data)

        # Calculate evaluation metrics
        silhouette = silhouette_score(data, labels)
        calinski_harabasz = calinski_harabasz_score(data, labels)
        davies_bouldin = davies_bouldin_score(data, labels)

        # Append results to the list, displaying N only once per technique
        if technique == "No Preprocessing":
            results_data.append(["N = {}".format(N), technique, silhouette, calinski_harabasz, davies_bouldin])
        else:
            results_data.append(["", technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["N (Clusters)", "Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print("Spectral Clustering:")
print(tabulate(results_data, headers=headers, tablefmt='pretty'))


Spectral Clustering:
+--------------+---------------------------------+---------------------+-------------------------+----------------------+
| N (Clusters) |     Preprocessing Technique     |  Silhouette Score   | Calinski-Harabasz Score | Davies-Bouldin Score |
+--------------+---------------------------------+---------------------+-------------------------+----------------------+
|    N = 3     |        No Preprocessing         | 0.16998514769554465 |    103.9348596680327    |  1.9967880269977991  |
|              |          Normalization          | 0.33059697454392634 |   286.52246817477413    |  1.308370292997756   |
|              |               PCA               | 0.3467699661228943  |    300.5302310417969    |  1.1048984477376986  |
|              |    Transform + Normalization    | 0.35873528574351016 |   295.81327216961665    |  0.9806970568688942  |
|              | Transform + Normalization + PCA | 0.6119680572562323  |    958.4790241732301    |  0.5107102255320352  |
|  

In [33]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from tabulate import tabulate
import warnings
warnings.filterwarnings("ignore")

patients = load_diabetes()
X = patients.data

results_data = []

# Different values of epsilon and minimum samples for DBSCAN
epsilon_values = [0.5, 1.0, 1.5]
min_samples_values = [5, 10, 15]

for epsilon in epsilon_values:
    for min_samples in min_samples_values:
        for technique in ["No Preprocessing", "Normalization", "PCA", "Transform + Normalization", "Transform + Normalization + PCA"]:
            if technique == "No Preprocessing":
                data = X
            elif technique == "Normalization":
                data = MinMaxScaler().fit_transform(X)
            elif technique == "PCA":
                data = PCA(n_components=2).fit_transform(X)
            elif technique == "Transform + Normalization":
                data = MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X))
            elif technique == "Transform + Normalization + PCA":
                data = PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
            
            # Perform DBSCAN Clustering
            clustering = DBSCAN(eps=epsilon, min_samples=min_samples)
            labels = clustering.fit_predict(data)

            # Check if more than one cluster label is generated
            if len(np.unique(labels)) > 1:
                # Calculate evaluation metrics
                silhouette = silhouette_score(data, labels)
                calinski_harabasz = calinski_harabasz_score(data, labels)
                davies_bouldin = davies_bouldin_score(data, labels)

                # Append results to the list
                results_data.append([f"Epsilon = {epsilon}, Min Samples = {min_samples}", technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Parameters", "Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print("DBSCAN Clustering:")
print(tabulate(results_data, headers=headers, tablefmt='pretty'))


DBSCAN Clustering:
+---------------------------------+---------------------------------+---------------------+-------------------------+----------------------+
|           Parameters            |     Preprocessing Technique     |  Silhouette Score   | Calinski-Harabasz Score | Davies-Bouldin Score |
+---------------------------------+---------------------------------+---------------------+-------------------------+----------------------+
| Epsilon = 0.5, Min Samples = 5  |          Normalization          | 0.39800898088714065 |   205.01166530017693    |  2.360481962946987   |
| Epsilon = 0.5, Min Samples = 5  | Transform + Normalization + PCA | 0.6559197265088276  |   1006.5030144117595    |  0.5415783527733922  |
| Epsilon = 0.5, Min Samples = 10 |          Normalization          | 0.39800898088714065 |   205.01166530017693    |  2.360481962946987   |
| Epsilon = 0.5, Min Samples = 10 | Transform + Normalization + PCA | 0.6559197265088276  |   1006.5030144117595    |  0.54157835277339