# Agency MBS pricing exploration

This notebook shows how I inspect data, train simple termination models, and run a Monte Carlo price on an example pool.

I keep the code modular and call into the package in `src/mbs_lab`. Plots are made with matplotlib and randomness is seeded for reproducibility.

Run date 2025-09-28


In [None]:
import os, json, yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import importlib

from src.mbs_lab import utils, models, rates, cashflow, engine

utils.set_seed(42)
cfg_path = 'configs/example_config.yaml'
cfg = yaml.safe_load(open(cfg_path,'r'))
csv_path = cfg['data']['csv_path']
df = pd.read_csv(csv_path)
df.head()

## Quick data checks

In [None]:
df.describe(numeric_only=True).T

In [None]:
fig = plt.figure(figsize=(6,4))
df['coupon_diff'].hist(bins=40)
plt.title('Distribution of coupon_diff')
plt.xlabel('coupon_diff')
plt.ylabel('count')
plt.show()

## Train simple prepay and default models on the current csv

In [None]:
art = Path(cfg['artifacts_dir'])
art.mkdir(exist_ok=True)
res_pre = models.fit_binary_model(df.copy(), cfg['labels']['prepay'], str(art / 'prepay_model.joblib'))
res_def = models.fit_binary_model(df.copy(), cfg['labels']['default'], str(art / 'default_model.joblib'))
res_pre, res_def

## ROC style sanity plot
I score the in sample data for a quick curve. For hiring, I would normally hold out a time slice, but this keeps the notebook compact.

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split

X = df[models.CORE_FEATURES].values
y_pre = df[cfg['labels']['prepay']].astype(int).values
y_def = df[cfg['labels']['default']].astype(int).values

pipe_pre = models.load_model(str(art / 'prepay_model.joblib'))
pipe_def = models.load_model(str(art / 'default_model.joblib'))

p_pre = pipe_pre.predict_proba(X)[:,1]
p_def = pipe_def.predict_proba(X)[:,1]

fpr_pre, tpr_pre, _ = roc_curve(y_pre, p_pre)
fpr_def, tpr_def, _ = roc_curve(y_def, p_def)
auc_pre = auc(fpr_pre, tpr_pre)
auc_def = auc(fpr_def, tpr_def)

fig = plt.figure(figsize=(6,4))
plt.plot(fpr_pre, tpr_pre, label=f'prepay AUC {auc_pre:.3f}')
plt.plot(fpr_def, tpr_def, label=f'default AUC {auc_def:.3f}')
plt.plot([0,1],[0,1],'--')
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.title('ROC curves')
plt.legend()
plt.show()

## Pricing demo on one pool

In [None]:
out = engine.price_one(cfg_path)
out

## Rate path snapshot

In [None]:
n_paths = cfg['rates']['n_paths']
n_steps = cfg['rates']['n_steps']
dt = cfg['rates']['dt']
r0 = cfg['rates']['r0']
a = cfg['rates']['a']
sigma = cfg['rates']['sigma']
theta = cfg['rates']['theta']
r = rates.simulate_hull_white_paths(n_paths, n_steps, dt, a, sigma, r0, theta)
fig = plt.figure(figsize=(6,4))
for i in range(min(5, r.shape[0])):
    plt.plot(r[i])
plt.title('sample short rate paths')
plt.xlabel('step')
plt.ylabel('r')
plt.show()

## Cashflow profile for the synthetic pool

In [None]:
X = df[models.CORE_FEATURES].fillna(method='ffill').fillna(method='bfill').values
pre = models.load_model(str(art / 'prepay_model.joblib'))
de = models.load_model(str(art / 'default_model.joblib'))
smm = models.smm_from_features(pre, X)
dmm = models.default_prob_from_features(de, X)
pool = cfg['pool']
cf = cashflow.project_pool_cashflows(pool['upb'], pool['wac'], pool['maturity_months'], smm, dmm, pool['loss_severity'])
fig = plt.figure(figsize=(6,4))
plt.plot(cf['month'], cf['cash_to_investor'])
plt.title('monthly cash to investor')
plt.xlabel('month')
plt.ylabel('cash')
plt.show()
cf.head()