# Customer Segmentation using K-Means Clustering
## Credora Internship - Task 2

In [None]:
# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [None]:
# Step 2: Load the Dataset
data = pd.read_csv("Mall_Customers.csv")
data.head()

In [None]:
# Step 3: Explore the Data
print(data.info())
print(data.describe())

In [None]:
# Step 4: Handle Missing Values
print(data.isnull().sum())

In [None]:
# Step 5: Feature Selection
X = data[["Annual Income (k$)", "Spending Score (1-100)"]]

In [None]:
# Step 6: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Step 7: Elbow Method to Determine Optimal Clusters
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(8,5))
plt.plot(range(1, 11), inertia, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.grid(True)
plt.show()

In [None]:
# Step 8: Apply KMeans with Optimal k (e.g., k=5)
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(X_scaled)
data['Cluster'] = kmeans.labels_

In [None]:
# Step 9: Visualize the Clusters
plt.figure(figsize=(8,5))
sns.scatterplot(x=data["Annual Income (k$)"], y=data["Spending Score (1-100)"], hue=data['Cluster'], palette='Set1')
plt.title('Customer Segments')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend(title='Cluster')
plt.show()

In [None]:
# Step 10: Analyze Each Cluster
print(data.groupby('Cluster').mean())