In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load dataset
data = pd.read_csv('your_dataset.csv')

# Drop target column if present
X = data.drop(columns=['target'], errors='ignore')


In [None]:
# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Initialize lists for metrics
inertia_values = []
silhouette_scores = []

# Loop over values of k
for k in range(2, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    labels = kmeans.labels_

    # Compute inertia and silhouette score
    inertia_values.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(X_scaled, labels))

# Plot inertia and silhouette score
plt.figure(figsize=(12, 5))

# Inertia plot
plt.subplot(1, 2, 1)
plt.plot(range(2, 11), inertia_values, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method')

# Silhouette score plot
plt.subplot(1, 2, 2)
plt.plot(range(2, 11), silhouette_scores, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Analysis')

plt.tight_layout()
plt.show()


In [None]:
# Perform KMeans without scaling
inertia_raw = []
silhouette_raw = []

for k in range(2, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    labels = kmeans.labels_
    inertia_raw.append(kmeans.inertia_)
    silhouette_raw.append(silhouette_score(X, labels))

# Compare scaled vs unscaled metrics
plt.figure(figsize=(12, 5))

# Inertia comparison
plt.plot(range(2, 11), inertia_values, label='Scaled Inertia')
plt.plot(range(2, 11), inertia_raw, label='Unscaled Inertia')
plt.xlabel('k')
plt.ylabel('Inertia')
plt.title('Effect of Scaling on Inertia')
plt.legend()
plt.show()
