# Implementation: Feature Scaling

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

sns.set_theme()

# Generate data with outliers
np.random.seed(42)
data = np.random.normal(loc=50, scale=10, size=1000)
data = np.append(data, [500, -200]) # Add outliers
df = pd.DataFrame({'Value': data})

plt.figure(figsize=(12, 4))
sns.boxplot(x=df['Value'])
plt.title('Original Data with Outliers')
plt.show()

## 1. Comparing Scalers

In [None]:
std_scaler = StandardScaler()
minmax_scaler = MinMaxScaler()
robust_scaler = RobustScaler()

df['Standard'] = std_scaler.fit_transform(df[['Value']])
df['MinMax'] = minmax_scaler.fit_transform(df[['Value']])
df['Robust'] = robust_scaler.fit_transform(df[['Value']])

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

sns.kdeplot(df['Standard'], ax=axes[0], shade=True)
axes[0].set_title('Standard Scaler (Outliers skew mean)')

sns.kdeplot(df['MinMax'], ax=axes[1], shade=True)
axes[1].set_title('MinMax Scaler (Squashed by outliers)')

sns.kdeplot(df['Robust'], ax=axes[2], shade=True)
axes[2].set_title('Robust Scaler (Preserves distribution shape)')

plt.show()