In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("./data/SampleFile.csv")

df = df.select_dtypes(include=np.number)
df.head()

In [None]:
# ========================= 1. Absolute Maximum Scaling =========================

# Computes max absolute value per column with np.max(np.abs(df), axis=0).
# Divides each value by that max absolute to scale features between -1 and 1.
# Displays first few rows of scaled data with scaled_df.head().

max_ads = df.max(np.abs(df), axis=0)

scaled_df = df / max_ads
scaled_df.head()

In [None]:
# ========================= 2. Min-Max Scaling =========================

# Creates MinMaxScaler object to scale features to range.
# Fits scaler to data and transforms with scaler.fit_transform(df).
# Converts result to DataFrame maintaining column names.
# Shows first few scaled rows with scaled_df.head().

from sklearn.preprocessing import MinMaxScaler


scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns)

scaled_df.head()

In [None]:
# ========================= 3. Normalization (Vector Normalization) =========================

# Scales each row (sample) to have unit norm (length = 1) based on Euclidean distance.
# Focuses on direction rather than magnitude of data points.
# Useful for algorithms relying on similarity or angles (e.g., cosine similarity).
# scaled_df.head() shows normalized data where each row is scaled individually.

from sklearn.preprocessing import Normalizer

scaler = Normalizer()
scaled_data = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns)

scaled_df.head()

In [None]:
# ========================= 4. Standardization =========================

# Centers features by subtracting mean and scales to unit variance.
# Transforms data to have zero mean and standard deviation of 1.
# Assumes roughly normal distribution; improves many ML algorithms’ performance.
# scaled_df.head() shows standardized features.

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns)

scaled_df.head()

In [None]:
# ========================= 5. Robust Scaling =========================

# Uses median and interquartile range (IQR) for scaling instead of mean/std.
# Robust to outliers and skewed data distributions.
# Centers data around median and scales based on spread of central 50% values.
# scaled_df.head() shows robustly scaled data minimizing outlier effects.

from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
scaled_data = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns)

scaled_df.head()