# Normalization vs Standardization Demo Notebook

In [None]:

# 1. Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [None]:

# 2. Load Sample Data
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

print("Sample Data:")
print(X.head())


In [None]:

# 3. Quick look at feature scales
plt.figure(figsize=(10,5))
sns.boxplot(data=X)
plt.title('Feature Scales Before Normalization/Standardization')
plt.xticks(rotation=90)
plt.show()


In [None]:

# 4. Normalization (Min-Max Scaling)
scaler_norm = MinMaxScaler()
X_normalized = scaler_norm.fit_transform(X)


In [None]:

# 5. Standardization (Z-Score Scaling)
scaler_std = StandardScaler()
X_standardized = scaler_std.fit_transform(X)


In [None]:

# 6. Compare a Feature before and after
feature_idx = 0  # MedInc

plt.figure(figsize=(15,5))
plt.subplot(1,3,1)
plt.hist(X.iloc[:, feature_idx], bins=30)
plt.title('Original: ' + X.columns[feature_idx])

plt.subplot(1,3,2)
plt.hist(X_normalized[:, feature_idx], bins=30)
plt.title('Normalized')

plt.subplot(1,3,3)
plt.hist(X_standardized[:, feature_idx], bins=30)
plt.title('Standardized')

plt.show()


In [None]:

# 7. Train-Test Split
X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(X_normalized, y, test_size=0.2, random_state=42)
X_train_std, X_test_std, y_train_std, y_test_std = train_test_split(X_standardized, y, test_size=0.2, random_state=42)


In [None]:

# 8. Train Models
model_norm = LinearRegression()
model_norm.fit(X_train_norm, y_train_norm)
y_pred_norm = model_norm.predict(X_test_norm)

model_std = LinearRegression()
model_std.fit(X_train_std, y_train_std)
y_pred_std = model_std.predict(X_test_std)


In [None]:

# 9. Evaluate
print("--- Normalized Data ---")
print("MSE:", mean_squared_error(y_test_norm, y_pred_norm))
print("R2:", r2_score(y_test_norm, y_pred_norm))

print("\n--- Standardized Data ---")
print("MSE:", mean_squared_error(y_test_std, y_pred_std))
print("R2:", r2_score(y_test_std, y_pred_std))


In [None]:

# 10. Quick Summary
print("""
Summary:
- Normalization squashes features into [0,1].
- Standardization centers features to mean=0 and std=1.
- Both can improve model performance by helping models converge faster and perform better.
""")
