In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

column_names = ['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment',
                'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root',
                'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring',
                'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number', 'ring-type',
                'spore-print-color', 'population', 'habitat']

data = pd.read_csv('agaricus-lepiota.data', header=None, names=column_names)

data_encoded = pd.get_dummies(data.drop('class', axis=1))

scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_encoded)

def evaluate_kmeans(data, max_k=30):
    inertia = []
    silhouette = []
    davies_bouldin = []
    K = range(2, max_k + 1)
    
    for k in K:
        kmeans = KMeans(n_clusters=k, n_init='auto', random_state=42)
        kmeans.fit(data)
        
        inertia.append(kmeans.inertia_)
        
        labels = kmeans.labels_
        silhouette.append(silhouette_score(data, labels))
        
        davies_bouldin.append(davies_bouldin_score(data, labels))
    
    return inertia, silhouette, davies_bouldin

inertia, silhouette, davies_bouldin = evaluate_kmeans(data_scaled, max_k=30)

K = range(2, 31)

plt.figure(figsize=(16, 5))

plt.subplot(1, 3, 1)
plt.plot(K, inertia, 'bo-')
plt.title('Elbow Method (Inertia)')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')

plt.subplot(1, 3, 2)
plt.plot(K, silhouette, 'bo-')
plt.title('Silhouette Score')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Silhouette Score')

plt.subplot(1, 3, 3)
plt.plot(K, davies_bouldin, 'bo-')
plt.title('Davies-Bouldin Index')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Davies-Bouldin Score')

plt.tight_layout()
plt.show()

kmeans = KMeans(n_clusters=8, n_init=10, random_state=42)
labels = kmeans.fit_predict(data_scaled)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(data_scaled)

pca_df = pd.DataFrame(data=X_pca, columns=['PC1', 'PC2'])
pca_df['Cluster'] = labels

custom_colors = ['#FF6347', '#4682B4', '#32CD32', '#FFD700', '#FF00FF', 
                 '#C71585', '#40E0D0', '#FF4500']

plt.figure(figsize=(10, 6))
sns.scatterplot(x='PC1', y='PC2', hue='Cluster', palette=custom_colors, data=pca_df, s=100, alpha=0.7)
plt.title("PCA of Data with KMeans (8 Clusters) - Custom Colors")
plt.show()