# Task 8: Clustering with K-Means
## Objective: Perform unsupervised learning with K-Means clustering
### Tools: Scikit-learn, Pandas, Matplotlib

In [None]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
import seaborn as sns
sns.set()

In [None]:
# Step 2: Load the dataset
url = 'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mall_customers.csv'
df = pd.read_csv(url)
df.head()

In [None]:
# Step 3: Preprocess the data
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
X.head()

In [None]:
# Step 4: Use Elbow Method to find optimal number of clusters
inertia = []
K_range = range(1, 11)
for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

plt.plot(K_range, inertia, 'bo-')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.title('Elbow Method')
plt.show()

In [None]:
# Step 5: Fit KMeans with optimal number of clusters
kmeans = KMeans(n_clusters=5, random_state=42)
df['Cluster'] = kmeans.fit_predict(X)
df.head()

In [None]:
# Step 6: Visualize clusters
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='Annual Income (k$)', y='Spending Score (1-100)', hue='Cluster', palette='Set1')
plt.title('Customer Segments')
plt.show()

In [None]:
# Step 7: Evaluate with silhouette score
score = silhouette_score(X, df['Cluster'])
print(f'Silhouette Score: {score:.2f}')