# W07 - Mall Customers Clustering
**NIM: 0706022310053**
**Name: Collin Joseph**

---

## Part A: Data Understanding, Cleaning, Visualizations

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
url = "https://raw.githubusercontent.com/NathaliaMinoque/datasets/refs/heads/main/mall_customers.csv"
df = pd.read_csv(url)

df.head()


In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

### Check Missing Values & Duplicates

In [None]:

df.isnull().sum(), df.duplicated().sum()


---
## Part B: Data Transformation

In [None]:

from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encoding Gender
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

# Scaling numeric features
features = ['Age', 'Annual Income (k$)', 'Spending Score (1-100)']
scaler = StandardScaler()
df_scaled = df.copy()
df_scaled[features] = scaler.fit_transform(df[features])

df_scaled.head()


---
## Part C: Choosing K using Elbow Method

In [None]:

from sklearn.cluster import KMeans

wcss = []
X = df_scaled[features]

for k in range(2, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

plt.plot(range(2, 11), wcss)
plt.xlabel("K")
plt.ylabel("WCSS")
plt.title("Elbow Method")
plt.show()


---
## Part D: Modeling K-Means

In [None]:

# Choose K manually after analyzing Elbow curve
k_optimal = 5

kmeans = KMeans(n_clusters=k_optimal, random_state=42)
df_scaled['Cluster'] = kmeans.fit_predict(X)

df_scaled.head()


In [None]:

# Visualization (2D)
plt.scatter(df_scaled['Annual Income (k$)'], df_scaled['Spending Score (1-100)'], c=df_scaled['Cluster'])
plt.xlabel("Annual Income (scaled)")
plt.ylabel("Spending Score (scaled)")
plt.title("Customer Segmentation")
plt.show()


---
## Part E: Cluster Profiling & Insights

In [None]:

df_scaled.groupby('Cluster')[['Age', 'Annual Income (k$)', 'Spending Score (1-100)', 'Gender']].mean()


### Write personas and managerial implications here:
- Cluster 0: ...
- Cluster 1: ...
- etc.