# Task 8: K-Means Clustering
Objective: Perform unsupervised learning with K-Means clustering on the Mall Customers dataset.

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv('Mall_Customers.csv')
df.head()

## Selecting Features for Clustering

In [None]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Selecting features
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
X.head()

## Elbow Method to Determine Optimal K

In [None]:
wcss = []
K_range = range(1, 11)
for k in K_range:
    kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(6,4))
plt.plot(K_range, wcss, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters (K)')
plt.ylabel('WCSS')
plt.grid(True)
plt.show()

## Fit K-Means with Optimal K and Visualize Clusters

In [None]:
# Optimal K chosen as 5
k_optimal = 5
kmeans = KMeans(n_clusters=k_optimal, init='k-means++', random_state=42)
labels = kmeans.fit_predict(X)

# Add labels to DataFrame
df['Cluster'] = labels

# Silhouette Score
sil_score = silhouette_score(X, labels)
print(f"Silhouette Score: {sil_score:.4f}")

# Plot Clusters
plt.figure(figsize=(6,4))
for cluster in range(k_optimal):
    cluster_points = X[df['Cluster'] == cluster]
    plt.scatter(cluster_points.iloc[:,0], cluster_points.iloc[:,1], label=f'Cluster {cluster}')

plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], 
            s=200, c='black', marker='X', label='Centroids')
plt.title(f'K-Means Clustering (K={k_optimal})')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.grid(True)
plt.show()

## Save the Results

In [None]:
df.to_csv('mall_customers_clusters.csv', index=False)
print("Clustered dataset saved as mall_customers_clusters.csv")