# Customer Segmentation using KMeans and PCA

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

In [None]:
# Load dataset (replace with your actual CSV file name)
df = pd.read_csv("customer_data.csv")
df.dropna(inplace=True)

In [None]:
# Basic exploration
print(df.head())
print(df.info())
print(df.describe())

In [None]:
# Select numerical columns and standardize
X = df.select_dtypes(include=[np.number])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Elbow Method to determine optimal k
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8, 4))
plt.plot(range(1, 11), wcss, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.grid(True)
plt.show()

In [None]:
# Apply KMeans Clustering with k = 4 (update based on Elbow result)
k = 4
kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)

In [None]:
# Dimensionality Reduction using PCA
pca = PCA(n_components=2)
pca_components = pca.fit_transform(X_scaled)
df['PCA1'] = pca_components[:, 0]
df['PCA2'] = pca_components[:, 1]

In [None]:
# Plot Clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PCA1', y='PCA2', hue='Cluster', palette='Set2')
plt.title('Customer Clusters via PCA')
plt.grid(True)
plt.show()

In [None]:
# Cluster summary
cluster_summary = df.groupby('Cluster').mean()
print(cluster_summary)

In [None]:
# Export results (optional)
df.to_csv("segmented_customers.csv", index=False)