# Deep Learning Basics with PyTorch
# Part I — Foundations of Machine Learning
## Exercises

---
## *Appendix A — NumPy Warm-Up*
---

### Array Construction

In [None]:
import numpy as np

np.set_printoptions(precision=3, suppress=True)

A = np.tile(np.arange(3).reshape(3,1), (1,4))
A

### Sampling and Statistics

In [None]:
import pandas as pd

df = pd.read_csv("adr_prices_and_vol.csv", parse_dates=["Date"])
ticker = "CIB"
returns = df[f"{ticker}_Price"].pct_change().dropna()

sample = returns.sample(10000, replace=True, random_state=42)
print(f"mean = {sample.mean():.6f}, std = {sample.std():.6f}")

### Broadcasting

In [None]:
window = 20
X = np.lib.stride_tricks.sliding_window_view(returns, window_shape=window)
X_centered = X - X.mean(axis=1, keepdims=True)

print("Original window shape:", X.shape)
print("Centered matrix shape:", X_centered.shape)
print(X_centered[:3])

### Z-Score Standardization Function

In [None]:
def zscore(X):
    mu = X.mean(axis=0)
    sigma = X.std(axis=0)
    return (X - mu) / sigma

features = df[[f"{ticker}_Price", f"{ticker}_Volume"]].pct_change().dropna().values
features_z = zscore(features)
features_z[:3]

### Vectorized Squared Distance

In [None]:
a = features_z[0]
b = features_z[1]
sq_dist = np.sum((a - b)**2)
sq_dist_vec = np.dot((a - b), (a - b))
print(f"Squared distance (loop-free): {sq_dist_vec:.4f}")

---
## *Chapter 1 – Introduction to ML*
---

## Implement MAE & MSE in NumPy

In [None]:
def mae(y_true, y_pred):
    """Mean Absolute Error"""
    return np.mean(np.abs(y_true - y_pred))

def mse(y_true, y_pred):
    """Mean Squared Error"""
    return np.mean((y_true - y_pred)**2)

y_true = returns.shift(-1).dropna().values
y_pred = returns.iloc[:-1].values  

print(f"MAE: {mae(y_true, y_pred):.6f}")
print(f"MSE: {mse(y_true, y_pred):.6f}")

### Fit Linear Regression on ADR Returns

In [None]:
from sklearn.linear_model import LinearRegression

X = returns.shift(1).dropna().values.reshape(-1, 1)
y = returns.iloc[1:].values

split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

linreg = LinearRegression().fit(X_train, y_train)

y_pred_train = linreg.predict(X_train)
y_pred_test  = linreg.predict(X_test)

print(f"Coefficient: {linreg.coef_[0]:.6f}, Intercept: {linreg.intercept_:.6f}")
print(f"Train R²: {linreg.score(X_train, y_train):.4f}")
print(f"Test R² : {linreg.score(X_test, y_test):.4f}")

## Chronological Train/Validation/Test Split

In [None]:
def time_split(X, y, train_size=0.6, val_size=0.2):
    """Chronological split for time-series data."""
    n = len(X)
    n_train = int(n * train_size)
    n_val   = int(n * val_size)
    X_train, y_train = X[:n_train], y[:n_train]
    X_val,   y_val   = X[n_train:n_train+n_val], y[n_train:n_train+n_val]
    X_test,  y_test  = X[n_train+n_val:],        y[n_train+n_val:]
    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

(X_train, y_train), (X_val, y_val), (X_test, y_test) = time_split(X, y)
print(len(X_train), len(X_val), len(X_test))

### Residual Analysis

In [None]:
import matplotlib.pyplot as plt

plt.style.use("seaborn-v0_8")

y_pred_test = linreg.predict(X_test)
residuals = y_test - y_pred_test

fig, ax = plt.subplots(1, 2, figsize=(10, 3.5))
ax[0].scatter(y_pred_test, residuals, s=12, alpha=0.6)
ax[0].axhline(0, color="k", lw=1)
ax[0].set_xlabel("Predicted Return")
ax[0].set_ylabel("Residual (y_true - y_pred)")
ax[0].set_title("Residuals vs Predictions")

ax[1].hist(residuals, bins=40, alpha=0.7)
ax[1].set_title("Residual Distribution")
ax[1].set_xlabel("Residual")
ax[1].set_ylabel("Frequency")

plt.tight_layout()
plt.show()

### Challenge: Manual Gradient Descent for Linear Regression

In [None]:
def linear_gd(X, y, lr=0.01, epochs=500):
    """Manual gradient descent for simple linear regression."""
    X = np.c_[np.ones(len(X)), X] 
    w = np.zeros(X.shape[1])
    for _ in range(epochs):
        grad = -2/len(X) * X.T @ (y - X @ w)
        w -= lr * grad
    return w

w = linear_gd(X_train, y_train)
print(f"Intercept = {w[0]:.6f}, Slope = {w[1]:.6f}")

### Challenge: logistic regression + decision boundary

In [None]:
# --- Challenge: Logistic Regression + Decision Boundary (Compact Version) ---
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# --- Feature Engineering ---
X = pd.DataFrame({
    "lag1": returns.shift(1),
    "vol5": returns.rolling(5).std()
}).dropna()
y = (returns[-len(X):] > 0).astype(int).values

# --- Chronological Split (60/20/20) ---
n = len(X)
n_train, n_val = int(0.6 * n), int(0.2 * n)
X_train, X_val, X_test = X[:n_train], X[n_train:n_train+n_val], X[n_train+n_val:]
y_train, y_val, y_test = y[:n_train], y[n_train:n_train+n_val], y[n_train+n_val:]

# --- Standardize & Fit ---
scaler = StandardScaler().fit(X_train)
clf = LogisticRegression().fit(scaler.transform(X_train), y_train)

# --- Evaluate ---
y_pred = clf.predict(scaler.transform(X_test))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
print(confusion_matrix(y_test, y_pred))

# --- Decision Boundary (Fixed for Feature Names Consistency) ---
xx, yy = np.meshgrid(
    np.linspace(X_test["lag1"].min(), X_test["lag1"].max(), 200),
    np.linspace(X_test["vol5"].min(), X_test["vol5"].max(), 200)
)

# ✅ Create DataFrame with same column names as X_train
grid = pd.DataFrame(np.c_[xx.ravel(), yy.ravel()], columns=X_train.columns)

# Predict on the grid using the trained model and scaler
Z = clf.predict(scaler.transform(grid)).reshape(xx.shape)

# --- Plot ---
plt.figure(figsize=(6, 4))
plt.contourf(xx, yy, Z, alpha=0.3, cmap="coolwarm")
plt.scatter(
    X_test["lag1"], X_test["vol5"],
    c=y_test, cmap="coolwarm", s=20, edgecolor="k"
)
plt.title(f"Decision Boundary — Logistic Regression ({ticker} ADR)")
plt.xlabel("Lagged Return (z-scored)")
plt.ylabel("5-Day Volatility (z-scored)")
plt.tight_layout()
plt.show()


### Challenge: logistic regression + unified evaluation function
This helper evaluates regression or classification models using MAE/MSE or Accuracy/F1, depending on the `task` argument.

```python
# Utility: Unified Model Evaluation Function
# Helper function to evaluate regression or classification models quickly.

from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, f1_score

def evaluate_model(y_true, y_pred, task="regression"):
    """Unified evaluation for regression or classification tasks."""
    if task == "regression":
        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        print(f"MAE: {mae:.4f} | MSE: {mse:.4f}")
    elif task == "classification":
        acc = accuracy_score(y_true, y_pred)
        f1  = f1_score(y_true, y_pred)
        print(f"Accuracy: {acc:.3f} | F1-score: {f1:.3f}")
```        

#### Utility — Unified Evaluation Function (Return-Based)


```python

from sklearn.metrics import (
    mean_absolute_error, mean_squared_error,
    accuracy_score, f1_score, precision_score, recall_score
)

def evaluate_model(y_true, y_pred, task="regression"):
    """
    Unified evaluation for regression or classification tasks.
    Returns metrics as a dictionary for easy logging or DataFrame aggregation.
    
    Parameters
    ----------
    y_true : array-like
        True target values.
    y_pred : array-like
        Predicted target values.
    task : str, default='regression'
        Type of task. One of {'regression', 'classification'}.
        
    Returns
    -------
    dict
        A dictionary with evaluation metrics.
    """
    if task == "regression":
        return {
            "MAE": mean_absolute_error(y_true, y_pred),
            "MSE": mean_squared_error(y_true, y_pred)
        }
    elif task == "classification":
        return {
            "Accuracy": accuracy_score(y_true, y_pred),
            "Precision": precision_score(y_true, y_pred, zero_division=0),
            "Recall": recall_score(y_true, y_pred, zero_division=0),
            "F1": f1_score(y_true, y_pred, zero_division=0)
        }
    else:
        raise ValueError("Task must be either 'regression' or 'classification'.")
```       

---
## Chapter 2 – Features and Representations (Standardization & Pipelines)
---

### 1. Setup and Feature Construction

import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

df = pd.read_csv("adr_prices_and_vol.csv", parse_dates=["Date"])
ticker = "NU"

df["Return_1d"] = df[f"{ticker}_Price"].pct_change()
df["Vol_5d"] = df[f"{ticker}_Price"].pct_change().rolling(5).std()
df["MA_10"] = df[f"{ticker}_Price"].rolling(10).mean()
df["MA_50"] = df[f"{ticker}_Price"].rolling(50).mean()
df["MA_ratio"] = df["MA_10"] / df["MA_50"] - 1

df["Target"] = (df["Return_1d"].shift(-1) > 0).astype(int)

df = df.dropna().copy()

In [None]:
# ==============================================================
# Feature Engineering for Predicting 1-Day Ahead Returns (NU ADR)
# ==============================================================

import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

# --- Load Data ---
df = pd.read_csv("adr_prices_and_vol.csv", parse_dates=["Date"])
ticker = "NU"

# --- Basic Features ---
df["Return_1d"] = df[f"{ticker}_Price"].pct_change()
df["Vol_5d"] = df["Return_1d"].rolling(5).std()
df["MA_10"] = df[f"{ticker}_Price"].rolling(10).mean()
df["MA_20"] = df[f"{ticker}_Price"].rolling(20).mean()
df["MA_50"] = df[f"{ticker}_Price"].rolling(50).mean()
df["MA_ratio"] = df["MA_10"] / df["MA_50"] - 1

# --- Momentum Features ---
df["Mom_5d"] = df["Return_1d"].rolling(5).sum()
df["Mom_20d"] = df["Return_1d"].rolling(20).sum()

# --- Higher Moments ---
window = 20
df["Skew_20d"] = df["Return_1d"].rolling(window).apply(lambda x: skew(x, bias=False), raw=False)
df["Kurt_20d"] = df["Return_1d"].rolling(window).apply(lambda x: kurtosis(x, fisher=True, bias=False), raw=False)

# --- Volatility Ratios ---
df["Vol_20d"] = df["Return_1d"].rolling(20).std()
df["Vol_ratio"] = (df["Vol_5d"] / df["Vol_20d"].replace(0, np.nan)) - 1

# --- Trend / Mean-Reversion ---
df["Zscore_20d"] = (df[f"{ticker}_Price"] - df["MA_20"]) / df["Vol_20d"].replace(0, np.nan)
df["Trend_10_50"] = (df["MA_10"] > df["MA_50"]).astype(int)

# --- Liquidity (if available) ---
if f"{ticker}_Volume" in df.columns:
    df["Vol_Change_5d"] = df[f"{ticker}_Volume"].pct_change(5)
    df["Vol_SMA_ratio"] = (
        df[f"{ticker}_Volume"].rolling(5).mean() / df[f"{ticker}_Volume"].rolling(20).mean() - 1
    )

# --- RSI ---
delta = df[f"{ticker}_Price"].diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
rs = gain.rolling(14).mean() / loss.rolling(14).mean().replace(0, np.nan)
df["RSI_14"] = 100 - (100 / (1 + rs.replace([np.inf, -np.inf, 0], np.nan)))

# --- Target ---
df["Target"] = (df["Return_1d"].shift(-1) > 0).astype(int)

# --- Cleanup ---
df = df.replace([np.inf, -np.inf], np.nan).dropna().copy()

### 2. Feature Standardization (Z-Score)

In [None]:
# ==============================================================
# Feature Scaling (Standardization)
# ==============================================================

from sklearn.preprocessing import StandardScaler

# --- Select Features ---
feature_cols = [
    "Return_1d", "Vol_5d", "MA_ratio", "Mom_5d", "Mom_20d",
    "Skew_20d", "Kurt_20d", "Vol_ratio", "RSI_14", "Trend_10_50"
]

X = df[feature_cols]

# --- Standardize Features ---
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --- Convert Back to DataFrame ---
X_scaled = pd.DataFrame(X_scaled, columns=feature_cols, index=df.index)

# --- Quick Sanity Check ---
X_scaled.describe().round(3)

### 3. Pairwise Feature Relationships

In [None]:
# ==============================================================
# Pairplot — Full Feature Relationships (NU ADR)
# ==============================================================

import seaborn as sns

# --- Select all engineered features ---
all_features = [
    "Return_1d", "Vol_5d", "MA_ratio", "Mom_5d", "Mom_20d",
    "Skew_20d", "Kurt_20d", "Vol_ratio", "RSI_14", "Trend_10_50"
]

# --- Subsample if needed (to make plot manageable) ---
# Optional: if you have thousands of rows, uncomment the line below
# df_sample = df.sample(300, random_state=42)
# else just use df directly
df_sample = df.copy()

# --- Create pairplot ---
sns.pairplot(
    df_sample,
    vars=all_features,
    hue="Target",
    diag_kind="kde",
    corner=True,  # show only lower triangle to avoid clutter
    plot_kws={'alpha': 0.5, 's': 15, 'edgecolor': 'none'}
)

plt.suptitle("NU ADR — Full Feature Relationships (10 Variables)", y=1.02, fontsize=13)
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
corr = df[all_features].corr()
sns.heatmap(corr, cmap="coolwarm", annot=True, fmt=".2f", center=0)
plt.title("NU ADR — Feature Correlation Matrix", fontsize=13)
plt.show()

### 4. Logistic Regression Pipeline

In [None]:
# ==============================================================
# 4. Logistic Regression Pipeline — Predicting Next-Day Direction
# ==============================================================

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# --- Features & Target ---
features = [
    "Return_1d", "Vol_5d", "MA_ratio", "Mom_5d", "Mom_20d",
    "Skew_20d", "Kurt_20d", "Vol_ratio", "RSI_14", "Trend_10_50"
]
X = df[features]
y = df["Target"].values

# --- Chronological Split (80 / 20) ---
split = int(0.8 * len(X))
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y[:split], y[split:]

# --- Pipeline: Standardization + Logistic Regression ---
pipe = make_pipeline(
    StandardScaler(),
    LogisticRegression(max_iter=1000, class_weight="balanced")
)
pipe.fit(X_train, y_train)

# --- Evaluate ---
train_acc = pipe.score(X_train, y_train)
test_acc  = pipe.score(X_test, y_test)

print(f"Train Accuracy: {train_acc:.3f}")
print(f"Test  Accuracy: {test_acc:.3f}")

# --- Confusion Matrix ---
ConfusionMatrixDisplay(confusion_matrix(y_test, pipe.predict(X_test))).plot(cmap="Blues")
plt.title("NU ADR — Logistic Regression Confusion Matrix")
plt.show()


from sklearn.metrics import classification_report

y_pred = pipe.predict(X_test)
print(classification_report(y_test, y_pred, digits=3))


### 5. Logistic Decision Boundary (Optional Visualization)

In [None]:
# ==============================================================
# 5. Logistic Regression Decision Boundary (NU ADR)
# ==============================================================

from matplotlib.colors import ListedColormap
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# --- Select two features for visualization ---
feat1, feat2 = "Return_1d", "MA_ratio"
X2 = df[[feat1, feat2]].values
y2 = df["Target"].values

# --- Train pipeline (scaled logistic regression) ---
pipe2 = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, class_weight="balanced"))
pipe2.fit(X2, y2)

# --- Mesh grid over feature space ---
x_min, x_max = X2[:, 0].min() - 0.02, X2[:, 0].max() + 0.02
y_min, y_max = X2[:, 1].min() - 0.02, X2[:, 1].max() + 0.02
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                     np.linspace(y_min, y_max, 200))

Z = pipe2.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

# --- Plot decision boundary and observations ---
plt.figure(figsize=(6, 5))
plt.contourf(xx, yy, Z, alpha=0.25, cmap=ListedColormap(["#FF9999", "#99CC99"]))
plt.scatter(X2[:, 0], X2[:, 1], c=y2, edgecolors='k', cmap="bwr", alpha=0.7)
plt.xlabel(feat1)
plt.ylabel(feat2)
plt.title("NU ADR — Logistic Regression Decision Regions")
plt.tight_layout()
plt.show()

####  Challenge 1:

In [None]:
# ==============================================================
# Chapter 2 – Challenge 1: k-Fold Cross-Validation (NU ADR)
# ==============================================================

from sklearn.model_selection import KFold, cross_val_score

# --- Feature Matrix & Target ---
X = df[[
    "Return_1d", "Vol_5d", "MA_ratio", "Mom_5d", "Mom_20d",
    "Skew_20d", "Kurt_20d", "Vol_ratio", "RSI_14", "Trend_10_50"
]].values
y = df["Target"].values

# --- Model Pipeline ---
pipe = make_pipeline(
    StandardScaler(),
    LogisticRegression(max_iter=2000, class_weight="balanced")
)

# --- 5-Fold Cross Validation ---
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(pipe, X, y, cv=kf, scoring="accuracy")

print("=== Logistic Regression 5-Fold CV Results ===")
print(f"Fold Accuracies : {cv_scores.round(3)}")
print(f"Mean Accuracy   : {cv_scores.mean():.3f}")
print(f"Std Deviation   : {cv_scores.std():.3f}")

####  Challenge 2:

In [None]:
# ==============================================================
# Chapter 2 – Challenge 2: 2D Decision Boundary Visualization (NU ADR)
# ==============================================================

# --- Choose two features for visualization ---
feat1, feat2 = "Return_1d", "MA_ratio"
X2 = df[[feat1, feat2]].values
y2 = df["Target"].values

# --- Standardization + Logistic Regression pipeline ---
pipe2 = make_pipeline(
    StandardScaler(),
    LogisticRegression(max_iter=2000, class_weight="balanced")
)
pipe2.fit(X2, y2)

# --- Create a meshgrid for decision boundary ---
x_min, x_max = X2[:, 0].min() - 0.02, X2[:, 0].max() + 0.02
y_min, y_max = X2[:, 1].min() - 0.02, X2[:, 1].max() + 0.02
xx, yy = np.meshgrid(
    np.linspace(x_min, x_max, 200),
    np.linspace(y_min, y_max, 200)
)

# --- Predict over the meshgrid ---
Z = pipe2.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

# --- Plot Decision Regions ---
plt.figure(figsize=(7, 5))
plt.contourf(xx, yy, Z, alpha=0.25, cmap=ListedColormap(["#FF9999", "#99CC99"]))
plt.scatter(X2[:, 0], X2[:, 1], c=y2, edgecolors="k", cmap="bwr", s=30, alpha=0.6)
plt.xlabel(feat1)
plt.ylabel(feat2)
plt.title(f"NU ADR – Logistic Regression Decision Boundary ({feat1} vs {feat2})")
plt.tight_layout()
plt.show()

####  Challenge 3:

In [None]:
# ==============================================================
# Chapter 2 – Challenge 3: Minimal Data Pipeline API (NU ADR)
# ==============================================================

# --- Custom StandardScaler Class ---
class MyScaler:
    """Minimal reimplementation of scikit-learn's StandardScaler."""
    def fit(self, X):
        self.mean_ = np.mean(X, axis=0)
        self.std_ = np.std(X, axis=0)
        return self

    def transform(self, X):
        return (X - self.mean_) / (self.std_ + 1e-8)  # avoid division by zero

    def fit_transform(self, X):
        return self.fit(X).transform(X)

# --- Load & Prepare NU ADR Features ---
df = pd.read_csv("adr_prices_and_vol.csv", parse_dates=["Date"])
ticker = "NU"

df["Return_1d"] = df[f"{ticker}_Price"].pct_change()
df["Vol_5d"] = df["Return_1d"].rolling(5).std()
df["MA_10"] = df[f"{ticker}_Price"].rolling(10).mean()
df["MA_50"] = df[f"{ticker}_Price"].rolling(50).mean()
df["MA_ratio"] = df["MA_10"] / df["MA_50"] - 1
df["Target"] = (df["Return_1d"].shift(-1) > 0).astype(int)
df = df.dropna().copy()

# --- Select Features & Split ---
X = df[["Return_1d", "Vol_5d", "MA_ratio"]].values
y = df["Target"].values

split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# --- Apply Custom Scaler ---
scaler = MyScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- Train Logistic Regression ---
clf = LogisticRegression(max_iter=1000, class_weight="balanced")
clf.fit(X_train_scaled, y_train)

# --- Evaluate ---
y_pred = clf.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)

print(f"Test Accuracy: {acc:.3f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=3))

---
## Chapter 3 – Model Evaluation and Cross-Validation
---

Ex 1:

In [None]:
from sklearn.svm import SVC

# --- Feature Engineering ---
X = pd.DataFrame({
    "lag1": returns.shift(1),
    "vol5": returns.rolling(5).std()
}).dropna()

y = (returns[-len(X):] > 0).astype(int).values  # binary target

# --- Chronological Split (60/20/20) ---
n = len(X)
n_train, n_val = int(0.6 * n), int(0.2 * n)
X_train, X_val, X_test = X[:n_train], X[n_train:n_train+n_val], X[n_train+n_val:]
y_train, y_val, y_test = y[:n_train], y[n_train:n_train+n_val], y[n_train+n_val:]

# --- Pipelines ---
scaler = StandardScaler().fit(X_train)
X_train_s, X_val_s, X_test_s = scaler.transform(X_train), scaler.transform(X_val), scaler.transform(X_test)

clf_lr  = LogisticRegression().fit(X_train_s, y_train)
clf_svc = SVC(kernel="linear").fit(X_train_s, y_train)

# --- Evaluate ---
y_pred_lr  = clf_lr.predict(X_test_s)
y_pred_svc = clf_svc.predict(X_test_s)

acc_lr  = accuracy_score(y_test, y_pred_lr)
acc_svc = accuracy_score(y_test, y_pred_svc)

print(f"Test Accuracy → LogisticRegression: {acc_lr:.3f},  Linear SVC: {acc_svc:.3f}")
print("Confusion Matrix (LR):\n", confusion_matrix(y_test, y_pred_lr))
print("Confusion Matrix (SVC):\n", confusion_matrix(y_test, y_pred_svc))

In [None]:
def plot_decision_boundary(model, Xs, y, title):

    x_min, x_max = Xs[:, 0].min() - 1, Xs[:, 0].max() + 1
    y_min, y_max = Xs[:, 1].min() - 1, Xs[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300),
                         np.linspace(y_min, y_max, 300))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

    plt.contourf(xx, yy, Z, cmap=ListedColormap(["#FFBBBB", "#BBFFBB"]), alpha=0.25)
    plt.scatter(Xs[:, 0], Xs[:, 1], c=y, cmap=ListedColormap(["#FF0000", "#00AA00"]),
                edgecolor="k", s=30, alpha=0.8)
    plt.xlabel("lag1 (standardized)")
    plt.ylabel("vol5 (standardized)")
    plt.title(title)
    plt.show()

plot_decision_boundary(clf_lr,  X_test_s, y_test, "Logistic Regression Boundary")
plot_decision_boundary(clf_svc, X_test_s, y_test, "Linear SVC Boundary")

Ex 2: 

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

# --- Feature setup ---
X = pd.DataFrame({
    "lag1": returns.shift(1),
    "vol5": returns.rolling(5).std()
}).dropna()
y = returns[-len(X):].values  # use raw returns, not signs

# --- Chronological split (60/20/20) ---
n = len(X)
n_train, n_val = int(0.6*n), int(0.2*n)
X_train, X_val, X_test = X[:n_train], X[n_train:n_train+n_val], X[n_train+n_val:]
y_train, y_val, y_test = y[:n_train], y[n_train:n_train+n_val], y[n_train+n_val:]

# --- Standardize ---
scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s  = scaler.transform(X_test)

# --- Linear Regression baseline ---
lin_reg = LinearRegression().fit(X_train_s, y_train)
y_pred_lin = lin_reg.predict(X_test_s)
r2_lin = r2_score(y_test, y_pred_lin)
print(f"Linear Regression R²: {r2_lin:.4f}")

# --- Polynomial Features (degree=2) ---
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_s)
X_test_poly  = poly.transform(X_test_s)

poly_reg = LinearRegression().fit(X_train_poly, y_train)
y_pred_poly = poly_reg.predict(X_test_poly)
r2_poly = r2_score(y_test, y_pred_poly)
print(f"Polynomial (deg=2) Regression R²: {r2_poly:.4f}")

#### Residual Diagnostics

In [None]:
# --- Residuals ---
res_lin  = y_test - y_pred_lin
res_poly = y_test - y_pred_poly

fig, ax = plt.subplots(1, 2, figsize=(10,4))

ax[0].scatter(y_pred_lin, res_lin, color="gray", alpha=0.7)
ax[0].axhline(0, color="red", linestyle="--")
ax[0].set_title("Residuals: Linear Regression")
ax[0].set_xlabel("Predicted returns")
ax[0].set_ylabel("Residuals")

ax[1].scatter(y_pred_poly, res_poly, color="navy", alpha=0.7)
ax[1].axhline(0, color="red", linestyle="--")
ax[1].set_title("Residuals: Polynomial Regression (deg=2)")
ax[1].set_xlabel("Predicted returns")

plt.tight_layout()
plt.show()

In [None]:
print("Linear Coefficients:", lin_reg.coef_)
print("Polynomial Coefficients (first 5):", poly_reg.coef_[:5])

| Finding          | Implication                                  |
| ---------------- | -------------------------------------------- |
| (R^2 < 0)        | No linear or quadratic predictive structure. |
| Coefficients ≈ 0 | Predictors have no meaningful slope.         |
| Residuals random | The model is unbiased but uninformative.     |


Ex 3:

In [None]:
from sklearn.tree import DecisionTreeClassifier

X = pd.DataFrame({
    "lag1": returns.shift(1),
    "vol5": returns.rolling(5).std()
}).dropna()
y = (returns[-len(X):] > 0).astype(int).values

# --- Chronological split (60/20/20) ---
n = len(X)
n_train, n_val = int(0.6*n), int(0.2*n)
X_train, X_val, X_test = X[:n_train], X[n_train:n_train+n_val], X[n_train+n_val:]
y_train, y_val, y_test = y[:n_train], y[n_train:n_train+n_val], y[n_train+n_val:]

# --- Fit shallow and deep trees ---
tree_shallow = DecisionTreeClassifier(max_depth=3, random_state=42)
tree_deep    = DecisionTreeClassifier(max_depth=10, random_state=42)

tree_shallow.fit(X_train, y_train)
tree_deep.fit(X_train, y_train)

# --- Evaluate ---
for name, model in [("Shallow", tree_shallow), ("Deep", tree_deep)]:
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    depth = model.get_depth()
    leaves = model.get_n_leaves()
    print(f"{name} Tree → depth={depth}, leaves={leaves}, test acc={acc:.3f}")
    print("Confusion:\n", confusion_matrix(y_test, y_pred), "\n")

In [None]:
# --- Decision Tree Boundary Plotter (final clean version) ---
def plot_tree_boundary(model, X, y, title):
    h = 0.02
    x_min, x_max = X.iloc[:,0].min() - 0.5, X.iloc[:,0].max() + 0.5
    y_min, y_max = X.iloc[:,1].min() - 0.5, X.iloc[:,1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # wrap prediction grid as DataFrame with same feature names
    grid_df = pd.DataFrame(np.c_[xx.ravel(), yy.ravel()],
                           columns=X.columns)
    
    Z = model.predict(grid_df).reshape(xx.shape)
    
    # --- Plot ---
    plt.contourf(xx, yy, Z, cmap=ListedColormap(["#FFBBBB", "#BBFFBB"]), alpha=0.25)
    plt.scatter(X.iloc[:,0], X.iloc[:,1], c=y,
                cmap=ListedColormap(["#FF0000", "#00AA00"]),
                edgecolor="k", s=30)
    plt.xlabel("lag1"); plt.ylabel("vol5")
    plt.title(title)

# --- 🧩 CALL THE FUNCTION FOR BOTH TREES ---
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plot_tree_boundary(tree_shallow, X_test, y_test, "Shallow Tree (max_depth=3)")

plt.subplot(1, 2, 2)
plot_tree_boundary(tree_deep, X_test, y_test, "Deep Tree (max_depth=10)")

plt.tight_layout()
plt.show()

| Finding                                     | Implication                          |
| ------------------------------------------- | ------------------------------------ |
| Depth ↑ → complexity ↑ but no accuracy gain | Evidence of overfitting noise        |
| Both accuracies ≈ 0.5                       | No predictive structure              |
| Visual fragmentation in deep tree           | Classic overfit in low-signal domain |


In [None]:
print(X_test.describe())

- Financial reality – Lagged return and short-term volatility carry almost no predictive power for the next return sign.

- The model learns trivial partitions near zero and outputs ≈ 50 % accuracy

In [None]:
X_plot = X_test.copy()
X_plot['lag1'] *= 100      # convert to %
X_plot['vol5'] *= 100

plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plot_tree_boundary(tree_shallow, X_plot, y_test, "Shallow Tree (max_depth=3, % scale)")

plt.subplot(1,2,2)
plot_tree_boundary(tree_deep, X_plot, y_test, "Deep Tree (max_depth=10, % scale)")

plt.tight_layout()
plt.show()

**Takeaways**

- Linear methods (Logistic, SVC) and polynomial expansions show **no predictive structure** in single-asset daily returns — consistent with market efficiency.

- Decision trees illustrate variance explosion with depth on noisy data.

- Residuals and decision boundaries confirm the same principle: randomness dominates.

Ex 1 chapter 3 (Pdf slides) Regression Toy Dataset

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# --- Fixed random seed for reproducibility ---
np.random.seed(42)

# --- Generate noisy linear data ---
n = 200
X = np.linspace(-3, 3, n).reshape(-1, 1)
y_true = 0.8 * X.squeeze() + 0.5          # underlying signal
noise = np.random.normal(0, 0.4, size=n)  # Gaussian noise
y = y_true + noise                        # observed target

# --- Train/Validation/Test split (60/20/20) ---
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, shuffle=False)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False)

# --- Fit Linear Regression ---
model = LinearRegression().fit(X_train, y_train)

# --- Predictions ---
y_pred_train = model.predict(X_train)
y_pred_val   = model.predict(X_val)
y_pred_test  = model.predict(X_test)

# --- Metrics ---
def report_metrics(y_true, y_pred, label):
    print(f"{label:>5} | R²={r2_score(y_true, y_pred):.3f}  "
          f"MSE={mean_squared_error(y_true, y_pred):.3f}  "
          f"MAE={mean_absolute_error(y_true, y_pred):.3f}")

report_metrics(y_train, y_pred_train, "Train")
report_metrics(y_val,   y_pred_val,   "Val")
report_metrics(y_test,  y_pred_test,  "Test")

# --- Visualization ---
plt.figure(figsize=(8,4))
plt.scatter(X_train, y_train, color="gray", alpha=0.5, label="Train data")
plt.scatter(X_test,  y_test,  color="blue", alpha=0.5, label="Test data")
plt.plot(X, y_true, "k--", label="True relationship")
plt.plot(X, model.predict(X), "r", label="Fitted line")
plt.legend(); plt.title("Noisy Linear Regression Toy"); plt.show()

# --- Residual plot ---
residuals = y_test - y_pred_test
plt.figure(figsize=(6,3))
plt.scatter(y_pred_test, residuals, color="purple", alpha=0.7)
plt.axhline(0, color="red", linestyle="--")
plt.xlabel("Predicted y"); plt.ylabel("Residuals")
plt.title("Residuals vs Predicted (Test set)")
plt.show()

| Concept                     | Insight                                                                                                              |
| --------------------------- | -------------------------------------------------------------------------------------------------------------------- |
| **Bias–Variance Trade-off** | Train fit is strong; test degradation comes purely from variance due to noise, not model bias.                       |
| **Residual Diagnostics**    | Random scatter → residuals ≈ i.i.d. N(0, σ²). Model specification is correct.                                        |
| **Predictive Power**        | (R^2_{test}) ≈ 0.18 means the linear model explains 18 % of total variance — reasonable given synthetic noise level. |


#### Challenge 1: Hyperparameter Tuning

#### Step 1 — Setup & Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

#### Step 2 — Prepare the Data

In [None]:
# --- Feature engineering (same as before) ---
X = pd.DataFrame({
    "lag1": returns.shift(1),
    "vol5": returns.rolling(5).std()
}).dropna()
y = (returns[-len(X):] > 0).astype(int).values

# --- Chronological split (60/20/20) ---
n = len(X)
n_train, n_val = int(0.6*n), int(0.2*n)
X_train, X_val, X_test = X[:n_train], X[n_train:n_train+n_val], X[n_train+n_val:]
y_train, y_val, y_test = y[:n_train], y[n_train:n_train+n_val], y[n_train+n_val:]

# --- Define CV strategy ---
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

#### Step 3 — Tune RBF SVM

In [None]:
# --- Pipeline: standardize + SVM ---
pipe_svm = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='rbf'))
])

# --- Parameter grid ---
param_grid_svm = {
    'svm__C': [0.1, 1, 10, 100],
    'svm__gamma': [0.01, 0.1, 1, 10]
}

# --- Grid Search with Stratified CV ---
grid_svm = GridSearchCV(pipe_svm, param_grid_svm, cv=cv, scoring='accuracy', n_jobs=-1)
grid_svm.fit(X_train, y_train)

# --- Results ---
print("Best RBF SVM Parameters:", grid_svm.best_params_)
print(f"Best CV Accuracy: {grid_svm.best_score_:.3f}")

# --- Evaluate on test set ---
y_pred_svm = grid_svm.predict(X_test)
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_svm):.3f}")

#### Step 4 — Tune Decision Tree

In [None]:
# --- Tree model ---
param_grid_tree = {'max_depth': [1, 2, 3, 5, 8, 12, 15, None]}

grid_tree = GridSearchCV(
    DecisionTreeClassifier(random_state=42),
    param_grid_tree,
    cv=cv,
    scoring='accuracy',
    n_jobs=-1
)
grid_tree.fit(X_train, y_train)

# --- Results ---
print(f"Best Tree Depth: {grid_tree.best_params_['max_depth']}")
print(f"Best CV Accuracy: {grid_tree.best_score_:.3f}")

# --- Evaluate on test set ---
y_pred_tree = grid_tree.predict(X_test)
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_tree):.3f}")

#### Step 5 — Plot Validation Curves

In [None]:
# --- Plot heatmap for SVM grid search results ---
svm_results = pd.DataFrame(grid_svm.cv_results_)
scores = svm_results.pivot(index='param_svm__gamma', columns='param_svm__C', values='mean_test_score')

plt.figure(figsize=(6,5))
plt.imshow(scores, interpolation='nearest', cmap='viridis')
plt.title("RBF SVM Grid Search (CV Accuracy)")
plt.xlabel("C"); plt.ylabel("gamma")
plt.colorbar(label="CV Accuracy")
plt.xticks(range(len(param_grid_svm['svm__C'])), param_grid_svm['svm__C'])
plt.yticks(range(len(param_grid_svm['svm__gamma'])), param_grid_svm['svm__gamma'])
plt.show()

# --- Plot Tree Depth vs Accuracy ---
tree_results = pd.DataFrame(grid_tree.cv_results_)
plt.figure(figsize=(6,4))
plt.plot(param_grid_tree['max_depth'], tree_results['mean_test_score'], marker='o')
plt.title("Decision Tree: Validation Accuracy vs max_depth")
plt.xlabel("Max Depth"); plt.ylabel("CV Accuracy")
plt.grid(True)
plt.show()

#### Challenge 2: Class Imbalance

#### Step 1 — Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, precision_recall_curve
)

#### Step 2 — Generate Imbalanced Data

In [None]:
# Strongly imbalanced binary dataset (90% of class 0)
X, y = make_classification(
    n_samples=1000, n_features=2, n_informative=2, n_redundant=0,
    n_clusters_per_class=1, weights=[0.9, 0.1],
    flip_y=0.02, random_state=42
)

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.3, random_state=42
)

# Scale features
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#### Step 3 — Fit a Baseline Classifier

In [None]:
clf = LogisticRegression()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:, 1]

#### Step 4 — Evaluate Metrics

In [None]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

print(f"Accuracy : {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall   : {rec:.3f}")
print(f"F1-score : {f1:.3f}")
print(f"ROC AUC  : {roc_auc:.3f}")

#### Step 5 — Plot ROC and PR Curves

In [None]:
# --- ROC Curve ---
fpr, tpr, _ = roc_curve(y_test, y_prob)
plt.figure(figsize=(10,4))

plt.subplot(1,2,1)
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
plt.plot([0,1], [0,1], 'k--', alpha=0.6)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()

# --- Precision-Recall Curve ---
prec_curve, rec_curve, _ = precision_recall_curve(y_test, y_prob)
plt.subplot(1,2,2)
plt.plot(rec_curve, prec_curve)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision–Recall Curve")
plt.tight_layout()
plt.show()

In [None]:
clf_bal = LogisticRegression(class_weight='balanced')
clf_bal.fit(X_train, y_train)
print("Balanced F1:", f1_score(y_test, clf_bal.predict(X_test)))

## Results Recap

| Metric        |  Baseline | With `class_weight='balanced'` | Interpretation                                                                     |
| :------------ | :-------: | :----------------------------: | :--------------------------------------------------------------------------------- |
| **Accuracy**  |   0.973   | 0.956 (≈ expected slight drop) | Accuracy dominated by majority class — not a reliable metric here.                 |
| **Precision** | **1.000** |           ↓ slightly           | Model almost never produces false positives, but may miss true ones.               |
| **Recall**    | **0.758** |   ↑ slightly after balancing   | Model captures more minority positives with weighting.                             |
| **F1-score**  |   0.862   |              0.853             | Balanced model trades a bit of precision for recall — overall robustness improves. |
| **ROC AUC**   | **0.995** |             ≈ same             | Strong separation between classes — high rank consistency.                         |

---

## Interpretation

### ROC Curve

* **AUC = 0.995** shows the classifier ranks positive cases almost perfectly.
* Near-square shape (upper-left corner) means the model discriminates cleanly.

### Precision–Recall Curve

* Extremely high precision at moderate recall indicates **rare-event dominance** — the model confidently identifies most positives with almost no false alarms.
* The small dip near recall ≈ 1 reflects the final few borderline cases.

### Insight

In financial ML, this scenario parallels *rare profitable trades* or *default prediction*:

> It’s better to **capture fewer true positives with high precision** than to flood the book with false signals.

However, recall becomes crucial if missing positives has a high opportunity cost — hence why **balanced weighting** is introduced.

#### Challenge 3 – Probability Calibration

#### Step 1 — Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.metrics import (
    brier_score_loss, roc_auc_score, precision_score, recall_score, f1_score
)

#### Step 2 — Generate a Balanced Dataset

In [None]:
# --- Synthetic balanced binary classification ---
X, y = make_classification(
    n_samples=1000, n_features=2, n_informative=2, n_redundant=0,
    n_clusters_per_class=1, random_state=42
)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### Step 3 — Fit Uncalibrated SVM (Baseline)

In [None]:
# --- SVM with RBF kernel (uncalibrated probabilities) ---
svm_uncal = SVC(kernel='rbf', C=1.0, gamma=0.5, probability=False, random_state=42)
svm_uncal.fit(X_train, y_train)

# --- Get decision function scores ---
scores_uncal = svm_uncal.decision_function(X_test)

#### Step 4 — Fit Calibrated SVM

In [None]:
# --- Calibrate using sigmoid (Platt scaling) ---
svm_cal = CalibratedClassifierCV(svm_uncal, cv=5, method='sigmoid')
svm_cal.fit(X_train, y_train)

# --- Predicted probabilities ---
probs_cal = svm_cal.predict_proba(X_test)[:, 1]

#### Step 5 — Compute Calibration Metrics

In [None]:
# Convert decision_function scores to [0,1] via sigmoid for comparison
from scipy.special import expit
probs_uncal = expit(scores_uncal)

# --- Brier scores (lower = better) ---
brier_uncal = brier_score_loss(y_test, probs_uncal)
brier_cal = brier_score_loss(y_test, probs_cal)

print(f"Brier (uncalibrated): {brier_uncal:.4f}")
print(f"Brier (calibrated):   {brier_cal:.4f}")

# --- Other performance metrics ---
print(f"ROC AUC (uncal): {roc_auc_score(y_test, probs_uncal):.3f}")
print(f"ROC AUC (cal):   {roc_auc_score(y_test, probs_cal):.3f}")

#### Step 6 — Plot Reliability Curves

In [None]:
prob_true_uncal, prob_pred_uncal = calibration_curve(y_test, probs_uncal, n_bins=10)
prob_true_cal, prob_pred_cal = calibration_curve(y_test, probs_cal, n_bins=10)

plt.figure(figsize=(8,5))
plt.plot(prob_pred_uncal, prob_true_uncal, "o-", label=f"Uncalibrated (Brier={brier_uncal:.3f})")
plt.plot(prob_pred_cal, prob_true_cal, "o-", label=f"Calibrated (Brier={brier_cal:.3f})")
plt.plot([0,1], [0,1], "k--", label="Perfect calibration")
plt.xlabel("Predicted probability")
plt.ylabel("Observed frequency")
plt.title("Reliability Diagram (SVM)")
plt.legend()
plt.grid(True)
plt.show()

#### Extended Challenge 3 – Platt vs Isotonic Calibration

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.metrics import brier_score_loss, roc_auc_score
from scipy.special import expit

# --- Data generation ---
X, y = make_classification(
    n_samples=1000, n_features=2, n_informative=2, n_redundant=0,
    n_clusters_per_class=1, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# --- Uncalibrated SVM ---
svm_uncal = SVC(kernel='rbf', C=1.0, gamma=0.5, probability=False, random_state=42)
svm_uncal.fit(X_train, y_train)
probs_uncal = expit(svm_uncal.decision_function(X_test))  # convert scores to [0,1]

# --- Calibrated SVM (sigmoid / Platt) ---
svm_sigmoid = CalibratedClassifierCV(svm_uncal, cv=5, method='sigmoid')
svm_sigmoid.fit(X_train, y_train)
probs_sigmoid = svm_sigmoid.predict_proba(X_test)[:, 1]

# --- Calibrated SVM (isotonic regression) ---
svm_isotonic = CalibratedClassifierCV(svm_uncal, cv=5, method='isotonic')
svm_isotonic.fit(X_train, y_train)
probs_isotonic = svm_isotonic.predict_proba(X_test)[:, 1]

# --- Compute calibration metrics ---
def summarize_model(name, probs):
    brier = brier_score_loss(y_test, probs)
    auc = roc_auc_score(y_test, probs)
    return {"Model": name, "Brier": brier, "ROC AUC": auc}

summary = pd.DataFrame([
    summarize_model("Uncalibrated", probs_uncal),
    summarize_model("Sigmoid", probs_sigmoid),
    summarize_model("Isotonic", probs_isotonic)
])
print(summary)

# --- Reliability curves ---
prob_true_uncal, prob_pred_uncal = calibration_curve(y_test, probs_uncal, n_bins=10)
prob_true_sigmoid, prob_pred_sigmoid = calibration_curve(y_test, probs_sigmoid, n_bins=10)
prob_true_isotonic, prob_pred_isotonic = calibration_curve(y_test, probs_isotonic, n_bins=10)

plt.figure(figsize=(7,6))
plt.plot(prob_pred_uncal, prob_true_uncal, "o-", label=f"Uncalibrated (Brier={summary.iloc[0]['Brier']:.3f})")
plt.plot(prob_pred_sigmoid, prob_true_sigmoid, "o-", label=f"Sigmoid (Brier={summary.iloc[1]['Brier']:.3f})")
plt.plot(prob_pred_isotonic, prob_true_isotonic, "o-", label=f"Isotonic (Brier={summary.iloc[2]['Brier']:.3f})")
plt.plot([0,1], [0,1], "k--", label="Perfect calibration")
plt.xlabel("Predicted probability")
plt.ylabel("Observed frequency")
plt.title("Reliability Diagram – SVM Calibration Comparison")
plt.legend()
plt.grid(True)
plt.show()

## Results Summary

| Model               |   Brier   | ROC AUC | Interpretation                                                                                   |
| :------------------ | :-------: | :-----: | :----------------------------------------------------------------------------------------------- |
| **Uncalibrated**    |   0.072   |  0.984  | Excellent ranking power (AUC ≈ 0.98) but *poor calibration* — probabilities overconfident.       |
| **Sigmoid (Platt)** | **0.048** |  0.984  | Substantial improvement — smoother mapping, reduced overconfidence.                              |
| **Isotonic**        | **0.047** |  0.979  | Slightly lower AUC (tiny drop due to local overfitting) but *best calibration accuracy* overall. |

---

## Interpretation

### **ROC AUC (Ranking Performance)**

* Essentially unchanged across models → calibration **does not alter ranking**.
* Confirms both Platt and Isotonic preserve discrimination capacity (model ordering of events).

### **Brier Score (Calibration Accuracy)**

* Sharp reduction from 0.072 → 0.048 → 0.047 confirms **probability quality improved ~35%**.
* This corresponds to a better match between predicted probabilities and true event frequencies — critical when using model outputs for position sizing, risk capital allocation, or expected return modeling.

### **Reliability Curves**

* **Uncalibrated (blue):** classic SVM pattern — steep jump near 0.4–0.6, showing overconfident predictions.
* **Sigmoid (green):** near-diagonal, nicely smoothed — good default choice for small/medium datasets.
* **Isotonic (red):** piecewise and tightly hugging diagonal — slightly more variance (zigzagging), reflecting nonparametric flexibility.

---

## Interpretation for Quantitative Finance Context

| Concept                                       | Relevance                                                                                                                                                                       |
| :-------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **Calibrated Probability = Trade Confidence** | In a trading signal model, a 0.8 probability after calibration means historically ≈ 80 % of such signals were correct — critical for position sizing or portfolio risk scaling. |
| **Overconfident Models = Mispriced Risk**     | Uncalibrated outputs tend to overweight high-confidence signals (e.g., misestimating Sharpe or VaR).                                                                            |
| **Platt vs Isotonic**                         | Use *Platt* for limited or noisy samples (smoother, monotonic). Use *Isotonic* for large samples or when the relationship is clearly nonlinear.                                 |