In [None]:
# Question 2: Feature Scaling
# Description: Demonstrate the importance of feature scaling using a K-Means clustering example.

In [None]:
# Feature Scaling - KMeans Clustering Example

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Create a sample dataset
np.random.seed(42)
# Feature 1: Annual income (0 - 100000)
income = np.random.normal(50000, 15000, 100)
# Feature 2: Age (18 - 70)
age = np.random.normal(40, 10, 100)

# Combine into a DataFrame
df = pd.DataFrame({
    'Income': income,
    'Age': age
})

# ----------- Clustering WITHOUT Feature Scaling -----------
kmeans_no_scaling = KMeans(n_clusters=3, random_state=42)
df['Cluster_No_Scaling'] = kmeans_no_scaling.fit_predict(df[['Income', 'Age']])

# ----------- Clustering WITH Feature Scaling -----------
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[['Income', 'Age']])
kmeans_scaled = KMeans(n_clusters=3, random_state=42)
df['Cluster_Scaled'] = kmeans_scaled.fit_predict(scaled_features)

# ----------- Plotting Results -----------
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Without Scaling
axes[0].scatter(df['Income'], df['Age'], c=df['Cluster_No_Scaling'], cmap='viridis')
axes[0].set_title('K-Means without Feature Scaling')
axes[0].set_xlabel('Income')
axes[0].set_ylabel('Age')

# With Scaling
axes[1].scatter(df['Income'], df['Age'], c=df['Cluster_Scaled'], cmap='viridis')
axes[1].set_title('K-Means with Feature Scaling')
axes[1].set_xlabel('Income')
axes[1].set_ylabel('Age')

plt.tight_layout()
plt.show()
