# 02_train_cql.ipynb
Enhanced training notebook for CQL:
- Loss & diagnostic plots
- Optional Weights & Biases logging
- Hyperparameter sweep template


In [6]:
!pip install pyyaml
!pip install matplotlib

Collecting matplotlib
  Using cached matplotlib-3.10.7-cp311-cp311-macosx_10_12_x86_64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.61.0-cp311-cp311-macosx_10_9_x86_64.whl.metadata (113 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl.metadata (8.8 kB)
Collecting pyparsing>=3 (from matplotlib)
  Using cached pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Using cached matplotlib-3.10.7-cp311-cp311-macosx_10_12_x86_64.whl (8.3 MB)
Using cached contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl (288 kB)
Using cached cy

In [8]:
import sys, os

# Go up one directory from notebooks/ → project root
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Add project root and src/ to Python path
sys.path.append(project_root)
sys.path.append(os.path.join(project_root, "src"))

print("PYTHONPATH updated:", sys.path[-2:])

import yaml
import matplotlib.pyplot as plt
from src.rl.cql import build_config_from_yamls, train_cql
print('Imports OK')

PYTHONPATH updated: ['/Users/matthewsaccone/Desktop/MLB-Bullpen-Strategy', '/Users/matthewsaccone/Desktop/MLB-Bullpen-Strategy/src']
Imports OK


In [12]:
model_cfg = yaml.safe_load(open('../configs/model.yaml'))
training_cfg = yaml.safe_load(open('../configs/training.yaml'))
data_cfg = yaml.safe_load(open('../configs/data.yaml'))
env_cfg = yaml.safe_load(open('../configs/env.yaml'))
print('Loaded configs: model_type=', model_cfg.get('model_type'))

Loaded configs: model_type= QNetwork


In [13]:
cfg = build_config_from_yamls()
cfg

FileNotFoundError: [Errno 2] No such file or directory: 'configs/model.yaml'

In [None]:
dataset_path = os.path.join(data_cfg['processed_data_dir'], data_cfg['dataset_file'])
dataset_path

In [None]:
use_wandb = cfg.use_wandb
if use_wandb:
    import wandb
    wandb.init(project='mlb-bullpen-cql', config=cfg.__dict__)
    print('WandB enabled')
else:
    print('WandB disabled')

In [None]:
q_net, target_q_net, losses, diagnostics = train_cql(cfg, dataset_npz=dataset_path)
print('Training finished')

## Training Loss Plot

In [None]:
plt.figure(figsize=(10,4))
plt.plot(losses)
plt.title('CQL Training Loss')
plt.xlabel('Step')
plt.ylabel('Loss')
plt.grid(True)
plt.show()

## TD Error & Conservative Regularizer

In [None]:
td_vals = [d['td_loss'] for d in diagnostics]
cql_vals = [d['cql_reg'] for d in diagnostics]
plt.figure(figsize=(10,4))
plt.plot(td_vals, label='TD Loss')
plt.plot(cql_vals, label='CQL Regularizer')
plt.legend(); plt.grid(True); plt.title('Diagnostics'); plt.show()

## Hyperparameter Sweep Template

In [None]:
sweep_results = {}
cql_alphas = [0.1, 0.5, 1.0, 5.0]
for alpha in cql_alphas:
    print(f'Running sweep alpha={alpha}')
    cfg_sweep = build_config_from_yamls()
    cfg_sweep.cql_alpha = alpha
    q_net_s, t_s, loss_s, diag_s = train_cql(cfg_sweep, dataset_npz=dataset_path, max_steps=5000)
    sweep_results[alpha] = sum([d['td_loss'] for d in diag_s]) / len(diag_s)

sweep_results