In [1]:
# Feature Scaling - Standardization (Z-score Normalization)

# --- Step 1: Import Libraries ---
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# --- Step 2: Create Sample Dataset ---
data = {
    'Age': [18, 22, 25, 30, 35, 40, 45, 50],
    'Salary': [20000, 25000, 30000, 40000, 50000, 60000, 80000, 100000]
}

df = pd.DataFrame(data)
print("Original Dataset:\n", df)

# --- Step 3: Apply Standardization ---
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)

scaled_df = pd.DataFrame(scaled_data, columns=df.columns)
print("\nStandardized Dataset:\n", scaled_df)

# --- Step 4: Compare Mean and Standard Deviation ---
print("\nOriginal Mean:\n", df.mean())
print("Original Std:\n", df.std())

print("\nStandardized Mean (approx 0):\n", scaled_df.mean())
print("Standardized Std (approx 1):\n", scaled_df.std())

# --- Step 5: Example - Standardizing New Data ---
new_data = [[28, 45000]]
new_scaled = scaler.transform(new_data)
print("\nStandardized New Data (Age=28, Salary=45000):", new_scaled)


Original Dataset:
    Age  Salary
0   18   20000
1   22   25000
2   25   30000
3   30   40000
4   35   50000
5   40   60000
6   45   80000
7   50  100000

Standardized Dataset:
         Age    Salary
0 -1.422152 -1.165676
1 -1.046046 -0.975361
2 -0.763966 -0.785047
3 -0.293833 -0.404418
4  0.176300 -0.023789
5  0.646433  0.356840
6  1.116566  1.118097
7  1.586699  1.879355

Original Mean:
 Age          33.125
Salary    50625.000
dtype: float64
Original Std:
 Age          11.369602
Salary    28086.282875
dtype: float64

Standardized Mean (approx 0):
 Age      -5.551115e-17
Salary    5.551115e-17
dtype: float64
Standardized Std (approx 1):
 Age       1.069045
Salary    1.069045
dtype: float64

Standardized New Data (Age=28, Salary=45000): [[-0.48188629 -0.21410373]]


