In [None]:
# Re-do plumbing charts using the LHM theme with robust data loading and bullet summaries in a Word doc
# Steps:
# 1) Load FRED data with multiple fallbacks; if all fail, create synthetic data
# 2) Build charts with LHM style
# 3) Generate 5 bullets per chart
# 4) Save PNGs and compile a Word document with charts and bullets

import os, io, sys, time, warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from tqdm import tqdm

# -------------------- Data Loading (Robust) --------------------
series_map = {
    'TGA': 'WTREGEN',
    'ONRRP_VOL': 'RRPONTSYD',
    'RESERVES': 'WRESBAL',
    'FED_ASSETS': 'WALCL',
    'SOFR': 'SOFR',
    'EFFR': 'EFFR',
    'IORB': 'IORB',
    'ONRRP_RATE': 'RRPONTSYAWARD',
    'DGS1MO': 'DGS1MO',
    'DGS3MO': 'DGS3MO',
    'DGS2': 'DGS2',
    'SP500': 'SP500',
    'VIX': 'VIXCLS'
}

start_date = pd.to_datetime('2018-01-01')

fred = None
try:
    from fredapi import Fred
    fred_key = os.environ.get('FRED_API_KEY', None)
    fred = Fred(api_key=fred_key) if fred_key else Fred()
except Exception:
    fred = None

import requests

def fred_csv(series_id):
    url = 'https://fred.stlouisfed.org/series/' + series_id + '/downloaddata/' + series_id + '.csv'
    r = requests.get(url, timeout=30)
    if r.status_code != 200:
        return None
    df = pd.read_csv(io.StringIO(r.text))
    if 'DATE' not in df.columns or 'VALUE' not in df.columns:
        return None
    s = pd.to_datetime(df['DATE'])
    v = pd.to_numeric(df['VALUE'], errors='coerce')
    out = pd.DataFrame({series_id: v.values}, index=s)
    out = out[out.index >= start_date]
    return out

pdr_ok = True
try:
    import pandas_datareader.data as web
except Exception:
    pdr_ok = False

def get_series(series_id):
    if fred is not None:
        try:
            s = fred.get_series(series_id)
            s = s.to_frame(name=series_id)
            s.index = pd.to_datetime(s.index)
            s = s.loc[s.index >= start_date]
            return s
        except Exception:
            pass
    if pdr_ok:
        try:
            s = web.DataReader(series_id, 'fred')
            s.index = pd.to_datetime(s.index)
            s.columns = [series_id]
            s = s.loc[s.index >= start_date]
            return s
        except Exception:
            pass
    try:
        return fred_csv(series_id)
    except Exception:
        return None

series_data = {}
failed = {}
for k, sid in tqdm(series_map.items(), desc='Downloading series'):
    df = None
    for attempt in range(3):
        df = get_series(sid)
        if df is not None and not df.empty:
            break
        time.sleep(1.0)
    if df is None or df.empty:
        failed[k] = sid
    else:
        df.columns = [k]
        series_data[k] = df

print('Loaded series successfully: ' + str(len(series_data)) + ' | Failed: ' + str(len(failed)))
if len(failed) > 0:
    print('Failed series: ' + str(failed))

# Synthetic fallback only if everything failed
if len(series_data) == 0:
    idx = pd.date_range(start=start_date, end=datetime.today(), freq='B')
    n = len(idx)
    rng = np.random.default_rng(7)
    fed_assets = 7000 + np.cumsum(rng.normal(0, 3, n))
    reserves = 3200 + np.cumsum(rng.normal(0, 2, n))
    tga = 500 + 120*np.sin(np.linspace(0, 20, n)) + rng.normal(0, 10, n)
    onrrp_rate = 4.3 + 0.2*np.sin(np.linspace(0, 8, n))
    iorb = onrrp_rate + 0.1
    effr = onrrp_rate + 0.05
    sofr = onrrp_rate - 0.03
    dgs1m = onrrp_rate + 0.1
    dgs3m = onrrp_rate + 0.15
    dgs2 = 4.1 + 0.5*np.sin(np.linspace(0, 6, n))
    spread = np.maximum(0.0, dgs1m - onrrp_rate)
    onrrp_vol = 2000 - 900*spread + rng.normal(0, 20, n)
    spx = 3000 + np.cumsum(rng.normal(0.6, 8, n))
    vix = 18 + 4*np.sin(np.linspace(0, 8, n)) + rng.normal(0, 1.5, n)

    series_data = {
        'FED_ASSETS': pd.DataFrame({'FED_ASSETS': fed_assets*1000}, index=idx),
        'RESERVES': pd.DataFrame({'RESERVES': reserves*1000}, index=idx),
        'TGA': pd.DataFrame({'TGA': tga*1000}, index=idx),
        'ONRRP_VOL': pd.DataFrame({'ONRRP_VOL': onrrp_vol*1000}, index=idx),
        'ONRRP_RATE': pd.DataFrame({'ONRRP_RATE': dgs1m}, index=idx),
        'IORB': pd.DataFrame({'IORB': iorb}, index=idx),
        'EFFR': pd.DataFrame({'EFFR': effr}, index=idx),
        'SOFR': pd.DataFrame({'SOFR': sofr}, index=idx),
        'DGS1MO': pd.DataFrame({'DGS1MO': dgs1m}, index=idx),
        'DGS3MO': pd.DataFrame({'DGS3MO': dgs3m}, index=idx),
        'DGS2': pd.DataFrame({'DGS2': dgs2}, index=idx),
        'SP500': pd.DataFrame({'SP500': spx}, index=idx),
        'VIX': pd.DataFrame({'VIX': vix}, index=idx)
    }
    print('All downloads failed; generated synthetic dataset for illustration.')

# Build master safely
master = None
for k in series_data:
    master = series_data[k] if master is None else master.join(series_data[k], how='outer')
if master is None or master.empty:
    print('Master dataframe is empty. Aborting.')
else:
    master = master.sort_index()
    print('Master head:')
    print(master.head())

# -------------------- LHM Theme --------------------
PRIMARY = '#048DD2'
BG = '#FFFFFF'
SECONDARY = ['#ff6a00', '#5e30eb', '#999999', '#00e645']
AX_COLOR = '#374151'
BORDER_COLOR = '#E5E5E0'
TICK_COLOR = '#6B7280'
LEGEND_COLOR = '#374151'
CREDIT_COLOR = '#9CA3AF'
ANNOT_COLOR = '#4B5563'

plt.rcParams.update({
    'figure.figsize': (12,7),
    'axes.facecolor': BG,
    'figure.facecolor': BG,
    'axes.edgecolor': AX_COLOR,
    'axes.linewidth': 1.0,
    'axes.labelcolor': TICK_COLOR,
    'xtick.color': TICK_COLOR,
    'ytick.color': TICK_COLOR,
    'font.family': 'Inter'
})

import matplotlib.dates as mdates

def apply_lhm(ax, title, ylabel=None, xlabel='Date', credit='Source: Lighthouse Macro'):
    ax.grid(False)
    for spine in ['top','left','right','bottom']:
        ax.spines[spine].set_color(AX_COLOR)
        ax.spines[spine].set_linewidth(1.0)
    ax.set_title(title, fontsize=18, color='#374151')
    if ylabel is not None:
        ax.set_ylabel(ylabel)
    ax.set_xlabel(xlabel)
    leg = ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=2, frameon=False)
    if leg is not None:
        for text in leg.get_texts():
            text.set_color(LEGEND_COLOR)
    for spine in ax.spines.values():
        spine.set_linewidth(1.5)
        spine.set_color(BORDER_COLOR)
    ax.tick_params(axis='both', which='both')
    ax.figure.text(0.99, 0.01, credit, ha='right', va='bottom', fontsize=9, color=CREDIT_COLOR)

# Helpers

def to_billions(s):
    med = pd.to_numeric(s.dropna(), errors='coerce').median()
    if pd.notna(med) and med > 1000000:
        return s / 1000.0
    return s

outdir = 'charts_lhm'
os.makedirs(outdir, exist_ok=True)
print('Output directory: ' + outdir)

saved = []

def save_show(name):
    path = os.path.join(outdir, name)
    plt.tight_layout()
    plt.savefig(path, dpi=150)
    plt.show()
    saved.append(path)
    print('Saved ' + path)

# -------------------- Charts --------------------
# 1 Liquidity plumbing
if all(k in series_data for k in ['TGA','ONRRP_VOL','RESERVES']):
    temp = pd.DataFrame({
        'TGA ($bn)': to_billions(series_data['TGA']['TGA']),
        'ON RRP usage ($bn)': to_billions(series_data['ONRRP_VOL']['ONRRP_VOL']),
        'Reserve balances ($bn)': to_billions(series_data['RESERVES']['RESERVES'])
    })
    ax = temp.plot(color=[PRIMARY, SECONDARY[2], SECONDARY[0]], linewidth=4)
    apply_lhm(ax, 'US Dollar Liquidity Plumbing: TGA, ON RRP, Reserve Balances', 'USD Billions')
    save_show('01_liquidity_plumbing_levels_LHM.png')

# 2 Fed assets vs reserves
if all(k in series_data for k in ['FED_ASSETS','RESERVES']):
    tmp = pd.DataFrame({
        'Fed total assets ($bn)': to_billions(series_data['FED_ASSETS']['FED_ASSETS']),
        'Reserve balances ($bn)': to_billions(series_data['RESERVES']['RESERVES'])
    })
    ax = tmp.plot(color=[PRIMARY, SECONDARY[0]], linewidth=4)
    apply_lhm(ax, 'Fed Balance Sheet vs Reserve Balances', 'USD Billions')
    save_show('02_fed_assets_vs_reserves_LHM.png')

# 3 Policy and money market rates
rate_cols = [k for k in ['SOFR','EFFR','IORB','ONRRP_RATE'] if k in series_data]
if len(rate_cols) >= 2:
    tmp = None
    for k in rate_cols:
        tmp = series_data[k] if tmp is None else tmp.join(series_data[k], how='outer')
    tmp = tmp.rename(columns={'ONRRP_RATE':'ON RRP award rate'})
    colors = [PRIMARY, SECONDARY[0], SECONDARY[1], SECONDARY[2]]
    ax = tmp.plot(color=colors[:tmp.shape[1]], linewidth=4)
    apply_lhm(ax, 'Policy and Money Market Rates', 'Percent')
    save_show('03_policy_money_market_rates_LHM.png')

# 4 Bills vs IORB and ON RRP
if all(k in series_data for k in ['DGS1MO','DGS3MO','IORB','ONRRP_RATE']):
    tmp = series_data['DGS1MO'].join(series_data['DGS3MO']).join(series_data['IORB']).join(series_data['ONRRP_RATE'])
    tmp = tmp.rename(columns={'DGS1MO':'1M UST','DGS3MO':'3M UST','ONRRP_RATE':'ON RRP rate'})
    ax = tmp.plot(color=[PRIMARY, SECONDARY[0], SECONDARY[1], SECONDARY[2]], linewidth=4)
    apply_lhm(ax, 'Front-End Rates: Bills vs IORB and ON RRP', 'Percent')
    save_show('04_bills_vs_iorb_onrrp_LHM.png')

# 5 ON RRP usage vs spread
if all(k in series_data for k in ['ONRRP_VOL','DGS1MO','ONRRP_RATE']):
    on = to_billions(series_data['ONRRP_VOL']['ONRRP_VOL'])
    spread = series_data['DGS1MO']['DGS1MO'] - series_data['ONRRP_RATE']['ONRRP_RATE']
    fig, ax1 = plt.subplots()
    ax1.plot(spread.index, spread.values, color=PRIMARY, linewidth=4, label='1M Bill - ON RRP (pp)')
    ax1.set_ylabel('1M Bill - ON RRP (pp)')
    ax2 = ax1.twinx()
    ax2.plot(on.index, on.values, color=SECONDARY[2], linewidth=3.5, alpha=0.7, label='ON RRP usage ($bn)')
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines + lines2, labels + labels2, loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=2, frameon=False)
    apply_lhm(ax1, 'ON RRP Usage vs Carry Incentive')
    save_show('05_onrrp_vs_bill_spread_LHM.png')

# 6 Reserves vs SP500
if all(k in series_data for k in ['RESERVES','SP500']):
    reserves = series_data['RESERVES']['RESERVES']
    if reserves.dropna().median() > 1000000:
        reserves = reserves / 1000000.0
    spx = series_data['SP500']['SP500']
    fig, ax1 = plt.subplots()
    ax1.plot(reserves.index, reserves.values, color=PRIMARY, linewidth=4, label='Reserves ($tn)')
    ax1.set_ylabel('Reserves ($tn)')
    ax2 = ax1.twinx()
    ax2.plot(spx.index, spx.values, color=SECONDARY[0], linewidth=3.5, alpha=0.8, label='S&P 500')
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines + lines2, labels + labels2, loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=2, frameon=False)
    apply_lhm(ax1, 'Reserve Balances vs S&P 500')
    save_show('06_reserves_vs_sp500_LHM.png')

# 7 VIX vs Reserves YoY
if all(k in series_data for k in ['RESERVES','VIX']):
    res = series_data['RESERVES'].copy()
    res_yoy = res.pct_change(252)*100.0
    res_yoy.columns = ['Reserves YoY (%)']
    fig, ax1 = plt.subplots()
    ax1.plot(res_yoy.index, res_yoy['Reserves YoY (%)'], color=PRIMARY, linewidth=4, label='Reserves YoY (%)')
    ax1.set_ylabel('Reserves YoY (%)')
    ax2 = ax1.twinx()
    ax2.plot(series_data['VIX'].index, series_data['VIX']['VIX'], color=SECONDARY[0], linewidth=3.5, alpha=0.8, label='VIX')
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines + lines2, labels + labels2, loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=2, frameon=False)
    apply_lhm(ax1, 'Reserves Momentum and Equity Volatility')
    save_show('07_reserves_yoy_vs_vix_LHM.png')

# 8 Fed assets YoY vs 2Y
if all(k in series_data for k in ['FED_ASSETS','DGS2']):
    fa = series_data['FED_ASSETS'].copy()
    fa_yoy = fa.pct_change(252)*100.0
    fa_yoy.columns = ['Fed assets YoY (%)']
    fig, ax1 = plt.subplots()
    ax1.plot(fa_yoy.index, fa_yoy['Fed assets YoY (%)'], color=PRIMARY, linewidth=4, label='Fed assets YoY (%)')
    ax1.set_ylabel('Fed assets YoY (%)')
    ax2 = ax1.twinx()
    ax2.plot(series_data['DGS2'].index, series_data['DGS2']['DGS2'], color=SECONDARY[0], linewidth=3.5, alpha=0.8, label='UST 2Y (%)')
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines + lines2, labels + labels2, loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=2, frameon=False)
    apply_lhm(ax1, 'Balance Sheet Momentum and Front-End Rates')
    save_show('08_fed_assets_yoy_vs_2y_LHM.png')

# 9 Weekly changes: TGA vs Reserves
if all(k in series_data for k in ['TGA','RESERVES']):
    tga_w = series_data['TGA'].resample('W-WED').last().diff()
    res_w = series_data['RESERVES'].resample('W-WED').last().diff()
    tga_w = to_billions(tga_w['TGA']).to_frame('TGA weekly change ($bn)')
    res_w = to_billions(res_w['RESERVES']).to_frame('Reserves weekly change ($bn)')
    tmp = tga_w.join(res_w)
    ax = tmp.plot(color=[PRIMARY, SECONDARY[0]], linewidth=4)
    apply_lhm(ax, 'Weekly Changes: TGA vs Reserve Balances', 'USD Billions')
    save_show('09_weekly_tga_vs_reserves_changes_LHM.png')

# 10 ON RRP vs SPX (indexed)
if all(k in series_data for k in ['ONRRP_VOL','SP500']):
    on = to_billions(series_data['ONRRP_VOL']['ONRRP_VOL']).to_frame('ON RRP ($bn)')
    spx = series_data['SP500'].copy()
    on_n = 100.0 * on / on.dropna().iloc[0]
    spx_n = 100.0 * spx / spx.dropna().iloc[0]
    tmp = on_n.join(spx_n)
    tmp.columns = ['ON RRP (norm=100)','S&P 500 (norm=100)']
    ax = tmp.plot(color=[PRIMARY, SECONDARY[0]], linewidth=4)
    apply_lhm(ax, 'ON RRP Usage vs S&P 500 (Indexed)', 'Index, 100 = first value')
    save_show('10_onrrp_vs_spx_indexed_LHM.png')

# -------------------- Bullets --------------------

def latest_val(s):
    try:
        return pd.to_numeric(s.dropna(), errors='coerce').iloc[-1]
    except Exception:
        return np.nan

bullets = []
if all(k in series_data for k in ['TGA','ONRRP_VOL','RESERVES']):
    bullets.append(('01_liquidity_plumbing_levels_LHM.png', {
        'WHAT IT IS': 'Levels of the Treasury General Account, Fed ON RRP usage, and reserve balances in the banking system.',
        'WHY IT MATTERS': 'Core components of USD liquidity; movements alter bank reserves and funding tone.',
        'HOW TO INTERPRET THE CHART': 'Higher reserves often coincide with easier liquidity; TGA draws funded from the system tend to reduce reserves; ON RRP usage reflects excess cash parked at the Fed.',
        'THE CURRENT READING': 'TGA ~ ' + str(round(latest_val(to_billions(series_data['TGA']['TGA'])),1)) + ' bn; ON RRP ~ ' + str(round(latest_val(to_billions(series_data['ONRRP_VOL']['ONRRP_VOL'])),1)) + ' bn; reserves ~ ' + str(round(latest_val(to_billions(series_data['RESERVES']['RESERVES'])),1)) + ' bn.',
        'THE MACRO & MARKET/TRADING IMPLICATIONS': 'The mix may be signaling the near-term supply of bank reserves and the pull on front-end markets.'
    }))
if all(k in series_data for k in ['FED_ASSETS','RESERVES']):
    bullets.append(('02_fed_assets_vs_reserves_LHM.png', {
        'WHAT IT IS': 'Fed total assets overlaid with banking system reserve balances.',
        'WHY IT MATTERS': 'QE/QT dynamics influence reserves, shaping repo and bill markets.',
        'HOW TO INTERPRET THE CHART': 'Divergences can indicate offsets from TGA and ON RRP; parallel moves suggest direct balance sheet transmission.',
        'THE CURRENT READING': 'Fed assets ~ ' + str(round(latest_val(to_billions(series_data['FED_ASSETS']['FED_ASSETS'])),0)) + ' bn; reserves ~ ' + str(round(latest_val(to_billions(series_data['RESERVES']['RESERVES'])),0)) + ' bn.',
        'THE MACRO & MARKET/TRADING IMPLICATIONS': 'Momentum may be signaling background liquidity that could influence risk appetite and term premia.'
    }))
if len(rate_cols) >= 2:
    bullets.append(('03_policy_money_market_rates_LHM.png', {
        'WHAT IT IS': 'Policy corridor (IORB and ON RRP) versus traded overnight rates (EFFR, SOFR).',
        'WHY IT MATTERS': 'Indicates policy transmission and floor effectiveness.',
        'HOW TO INTERPRET THE CHART': 'Rates within the corridor suggest orderly conditions; persistent gaps may flag technical pressure.',
        'THE CURRENT READING': 'SOFR ' + str(round(latest_val(series_data['SOFR']['SOFR']),2)) + '%; EFFR ' + str(round(latest_val(series_data['EFFR']['EFFR']),2)) + '%; IORB ' + str(round(latest_val(series_data['IORB']['IORB']),2)) + '%; ON RRP ' + str(round(latest_val(series_data['ONRRP_RATE']['ONRRP_RATE']),2)) + '%.',
        'THE MACRO & MARKET/TRADING IMPLICATIONS': 'Placement versus the floor could be signaling cash conditions and bill-repo relative value.'
    }))
if all(k in series_data for k in ['DGS1MO','DGS3MO','IORB','ONRRP_RATE']):
    bullets.append(('04_bills_vs_iorb_onrrp_LHM.png', {
        'WHAT IT IS': 'Bill yields alongside IORB and the ON RRP floor.',
        'WHY IT MATTERS': 'Determines cash allocation between bills, repo, and the RRP facility.',
        'HOW TO INTERPRET THE CHART': 'Bills above the floor encourage migration out of the RRP; compressions can steer cash back to the facility.',
        'THE CURRENT READING': '1M ' + str(round(latest_val(series_data['DGS1MO']['DGS1MO']),2)) + '%, 3M ' + str(round(latest_val(series_data['DGS3MO']['DGS3MO']),2)) + '%, IORB ' + str(round(latest_val(series_data['IORB']['IORB']),2)) + '%, ON RRP ' + str(round(latest_val(series_data['ONRRP_RATE']['ONRRP_RATE']),2)) + '%.',
        'THE MACRO & MARKET/TRADING IMPLICATIONS': 'Relative value could be signaling the direction of RRP balances and bill demand.'
    }))
if all(k in series_data for k in ['ONRRP_VOL','DGS1MO','ONRRP_RATE']):
    bullets.append(('05_onrrp_vs_bill_spread_LHM.png', {
        'WHAT IT IS': 'ON RRP usage versus 1M bill minus ON RRP rate spread.',
        'WHY IT MATTERS': 'Tracks the incentive to move cash between bills and the facility.',
        'HOW TO INTERPRET THE CHART': 'Wider positive spread often aligns with lower facility usage; narrower spread with higher usage.',
        'THE CURRENT READING': 'Spread ~ ' + str(round(latest_val(series_data['DGS1MO']['DGS1MO'] - series_data['ONRRP_RATE']['ONRRP_RATE']),2)) + ' pp; RRP ~ ' + str(round(latest_val(to_billions(series_data['ONRRP_VOL']['ONRRP_VOL'])),1)) + ' bn.',
        'THE MACRO & MARKET/TRADING IMPLICATIONS': 'May be signaling cash migration that impacts bill financing and repo levels.'
    }))
if all(k in series_data for k in ['RESERVES','SP500']):
    bullets.append(('06_reserves_vs_sp500_LHM.png', {
        'WHAT IT IS': 'Reserve balances compared with the S&P 500.',
        'WHY IT MATTERS': 'Reserves are a liquidity indicator that can correlate with risk-taking capacity.',
        'HOW TO INTERPRET THE CHART': 'Look for multi-month co-moves rather than day-to-day alignment; use as context rather than signal.',
        'THE CURRENT READING': 'Reserves ~ ' + str(round(latest_val(to_billions(series_data['RESERVES']['RESERVES'])),0)) + ' bn; S&P 500 ~ ' + str(round(latest_val(series_data['SP500']['SP500']),0)) + '.',
        'THE MACRO & MARKET/TRADING IMPLICATIONS': 'Backdrop could be signaling the breadth of balance sheet capacity supporting risk markets.'
    }))
if all(k in series_data for k in ['RESERVES','VIX']):
    bullets.append(('07_reserves_yoy_vs_vix_LHM.png', {
        'WHAT IT IS': 'Reserves momentum (YoY) versus equity volatility (VIX).',
        'WHY IT MATTERS': 'Liquidity swings can be associated with changing volatility regimes.',
        'HOW TO INTERPRET THE CHART': 'Falling momentum has at times coincided with higher volatility; use directionally and with caution.',
        'THE CURRENT READING': 'Reserves YoY ~ ' + str(round(latest_val(series_data['RESERVES']['RESERVES'].pct_change(252)*100.0),1)) + '%; VIX ~ ' + str(round(latest_val(series_data['VIX']['VIX']),1)) + '.',
        'THE MACRO & MARKET/TRADING IMPLICATIONS': 'May be signaling the volatility backdrop relevant for risk management and hedging.'