# Task 3: Unsupervised Learning – Customer Segmentation
## Objective:
Apply clustering techniques to segment customer data into different groups.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


In [None]:
# Load Mall Customer dataset (synthetic or uploaded dataset)
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mall_customers.csv"
df = pd.read_csv(url)
df.head()

In [None]:
# Drop non-numeric or irrelevant columns for clustering
X = df.drop(['CustomerID', 'Genre'], axis=1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Elbow method to find optimal number of clusters
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

plt.plot(range(1, 11), inertia, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.grid(True)
plt.show()

In [None]:
# Train KMeans with optimal clusters (k=5 as an example)
kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
df['Cluster'] = clusters
df.head()

In [None]:
# Reduce dimensions for visualization using PCA
pca = PCA(n_components=2)
pca_features = pca.fit_transform(X_scaled)

plt.figure(figsize=(8,6))
sns.scatterplot(x=pca_features[:,0], y=pca_features[:,1], hue=clusters, palette='Set2')
plt.title('Customer Segments via PCA')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.grid(True)
plt.show()