<a href="https://colab.research.google.com/github/DeoraHarleen/Machine-Learning/blob/main/Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

def perform_clustering(X, method, preprocessing=None):
    if preprocessing == 'No data processing':
        X_processed = X
    elif preprocessing == 'Using normalization':
        X_processed = StandardScaler().fit_transform(X)
    elif preprocessing == 'Using transform':
        X_processed = X # You can replace this with any transformation you want to apply
    elif preprocessing == 'Using PCA':
        X_processed = PCA(n_components=2).fit_transform(X)
    elif preprocessing == 'Using T+N':
        X_processed = StandardScaler().fit_transform(X)
        X_processed = PCA(n_components=2).fit_transform(X_processed)
    elif preprocessing == 'Using T+N+PCA':
        X_processed = StandardScaler().fit_transform(X)
        X_processed = PCA(n_components=2).fit_transform(X_processed)
        # You can add more preprocessing steps here
    else:
        X_processed = X

    if method == 'KMeans':
        model = KMeans(n_clusters=3, random_state=42)
    elif method == 'Hierarchical':
        model = AgglomerativeClustering(n_clusters=3)
    else:
        return None

    labels = model.fit_predict(X_processed)
    silhouette = silhouette_score(X_processed, labels)
    calinski = calinski_harabasz_score(X_processed, labels)
    davies = davies_bouldin_score(X_processed, labels)

    return silhouette, calinski, davies

# Function to generate a table for a given clustering method
def generate_table(method):
    preprocessings = ['No data processing', 'Using normalization', 'Using transform', 'Using PCA', 'Using T+N', 'Using T+N+PCA']
    scores = ['Silhouette', 'Calinski-Harabasz', 'Davies-Bouldin']
    results = pd.DataFrame(columns=preprocessings, index=scores)
    for preprocessing in preprocessings:
        result = perform_clustering(X, method.split()[0], preprocessing)
        if result is not None:
            results[preprocessing] = result
    return results

# Generate tables for each clustering method
kmeans_table = generate_table('KMeans')
hierarchical_table = generate_table('Hierarchical')
kmeans_shift_table = generate_table('KMeans Shift')

# Print the tables
print("KMeans Clustering Results:")
print(kmeans_table)
print("\nHierarchical Clustering Results:")
print(hierarchical_table)
print("\nKMeans Shift Clustering Results:")
print(kmeans_shift_table)

# Save KMeans Clustering Results
kmeans_table.to_csv('kmeans_results.csv')

# Save Hierarchical Clustering Results
hierarchical_table.to_csv('hierarchical_results.csv')

# Save KMeans Shift Clustering Results
kmeans_shift_table.to_csv('kmeans_shift_results.csv')

print("Results saved successfully.")



KMeans Clustering Results:
                   No data processing  Using normalization  Using transform  \
Silhouette                   0.552819             0.459948         0.552819   
Calinski-Harabasz          561.627757           241.904402       561.627757   
Davies-Bouldin               0.661972             0.833595         0.661972   

                    Using PCA   Using T+N  Using T+N+PCA  
Silhouette           0.597676    0.509168       0.509168  
Calinski-Harabasz  693.708433  293.856516     293.856516  
Davies-Bouldin       0.564816    0.709931       0.709931  

Hierarchical Clustering Results:
                   No data processing  Using normalization  Using transform  \
Silhouette                   0.554324             0.446689         0.554324   
Calinski-Harabasz          558.058041           222.719164       558.058041   
Davies-Bouldin               0.656256             0.803467         0.656256   

                    Using PCA   Using T+N  Using T+N+PCA  
Silhouette

