# HW6 FX Carry Strategy (GBP Funding)

This notebook implements the weekly FX carry strategy with GBP funding and outputs performance artifacts to `outputs/`.

In [None]:

import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 200)

BASE = Path('.').resolve()
OUT = BASE / 'outputs'
FIG = OUT / 'figures'
OUT.mkdir(exist_ok=True)
FIG.mkdir(parents=True, exist_ok=True)

print('Working directory:', BASE)
print('Data inventory:')
for folder, pattern in [('data', '*'), ('data_clean', '*')]:
    files = sorted((BASE / folder).glob(pattern))
    print(f'  {folder}/ ({len(files)} files)')
    for f in files:
        print('   -', f.name)


In [None]:

# --- Load and standardize data ---

def load_ois(data_clean_dir: Path) -> pd.Series:
    cands = sorted(data_clean_dir.glob('*IUDSOIA*.parquet')) + sorted(data_clean_dir.glob('*IUDSOIA*.csv'))
    if not cands:
        raise FileNotFoundError('No OIS file matching *IUDSOIA* found in data_clean/')
    fp = cands[0]
    if fp.suffix == '.parquet':
        df = pd.read_parquet(fp)
    else:
        df = pd.read_csv(fp)
    if isinstance(df.index, pd.DatetimeIndex):
        idx = pd.to_datetime(df.index).tz_localize(None)
        val_col = df.columns[0]
        s = pd.Series(df[val_col].values, index=idx, name='ois')
    else:
        date_col = next((c for c in df.columns if str(c).upper() in {'DATE','DT','TIME','DATETIME'}), df.columns[0])
        val_candidates = [c for c in df.columns if c != date_col]
        val_col = next((c for c in val_candidates if 'IUDSOIA' in str(c).upper() or 'SONIA' in str(c).upper() or 'VALUE' in str(c).upper()), val_candidates[0])
        s = pd.Series(pd.to_numeric(df[val_col], errors='coerce').values, index=pd.to_datetime(df[date_col], errors='coerce'), name='ois')
    s = s.sort_index().dropna()
    # Convert percent-like values to decimal if needed
    if s.median() > 1:
        s = s / 100.0
    return s


def load_fx_usd_per_ccy(data_clean_dir: Path, currencies) -> pd.DataFrame:
    # prefer already standardized USD/CCY panel
    cands = sorted(data_clean_dir.glob('*edi*cur*usd_per_ccy*wide*.parquet'))
    if cands:
        df = pd.read_parquet(cands[0])
        fx = df.copy()
        if not isinstance(fx.index, pd.DatetimeIndex):
            fx.index = pd.to_datetime(fx.index, errors='coerce')
        fx.index = fx.index.tz_localize(None)
    else:
        cands = sorted(data_clean_dir.glob('*EDI*CUR*.parquet')) + sorted(data_clean_dir.glob('*edi*cur*.parquet'))
        if not cands:
            raise FileNotFoundError('No FX file matching *EDI*CUR* found in data_clean/')
        df = pd.read_parquet(cands[0])
        if {'date','ccy','value'}.issubset(set(df.columns)):
            fx = df.pivot(index='date', columns='ccy', values='value')
            fx.index = pd.to_datetime(fx.index, errors='coerce').tz_localize(None)
        else:
            if 'date' in df.columns:
                df = df.set_index('date')
            fx = df.copy()
            fx.index = pd.to_datetime(fx.index, errors='coerce').tz_localize(None)
    fx = fx.sort_index()
    fx.columns = [str(c).upper().strip() for c in fx.columns]
    fx = fx.apply(pd.to_numeric, errors='coerce')

    # Keep only required currencies plus GBP
    keep = sorted(set(currencies + ['GBP']))
    for c in keep:
        if c not in fx.columns:
            fx[c] = np.nan
    fx = fx[keep]

    # sanity-based orientation check on GBP
    gbp = fx['GBP'].dropna()
    if not gbp.empty:
        med = gbp.median()
        if med < 0.7:  # likely GBP per USD; invert to USD per GBP
            fx = 1.0 / fx
            print('FX panel appears quoted as CCY per USD; inverted to USD per CCY.')
        else:
            print('FX panel appears quoted as USD per CCY; no inversion applied.')
    else:
        print('Warning: GBP series unavailable for FX orientation check.')
    return fx


def parse_curve_file(fp: Path) -> pd.DataFrame:
    raw = pd.read_csv(fp)
    out = []
    cols = list(raw.columns)
    # Expect pairs of date/value columns named like GTTRY1YR Govt, Unnamed:1, GTTRY5YR Govt, ...
    for i in range(0, len(cols), 2):
        c_date = cols[i]
        c_val = cols[i+1] if i+1 < len(cols) else None
        if c_val is None:
            continue
        name = str(c_date).upper().replace(' GOVT','').strip()
        # extract currency code and tenor
        import re
        m = re.search(r'GT([A-Z]{3})(\d+)(Y|YR)', name)
        if m is None:
            continue
        ccy = m.group(1)
        tenor = float(m.group(2))
        tmp = pd.DataFrame({
            'date': pd.to_datetime(raw[c_date], errors='coerce'),
            'rate': pd.to_numeric(raw[c_val], errors='coerce'),
            'ccy': ccy,
            'tenor': tenor
        })
        tmp = tmp.dropna(subset=['date','rate'])
        out.append(tmp)
    if not out:
        raise ValueError(f'No parsable curve columns in {fp.name}')
    return pd.concat(out, ignore_index=True)


def load_em_curves(data_dir: Path) -> pd.DataFrame:
    files = sorted(data_dir.glob('*Emerging*Mkt*YC*.csv'))
    if not files:
        raise FileNotFoundError('No curve files matching *Emerging*Mkt*YC* in data/')
    parts = [parse_curve_file(f) for f in files]
    df = pd.concat(parts, ignore_index=True)
    df['date'] = pd.to_datetime(df['date']).dt.tz_localize(None)
    # clean percent vs decimal
    if df['rate'].median() > 1:
        df['rate'] = df['rate'] / 100.0
    # collapse duplicates by average
    df = df.groupby(['date','ccy','tenor'], as_index=False)['rate'].mean()
    return df

EM_CCY = ['BRL','NGN','PKR','TRY','ZAR']
ois = load_ois(BASE/'data_clean')
fx = load_fx_usd_per_ccy(BASE/'data_clean', EM_CCY)
curves = load_em_curves(BASE/'data')

print('OIS:', ois.index.min().date(), 'to', ois.index.max().date(), 'obs=', len(ois))
print('FX:', fx.index.min().date(), 'to', fx.index.max().date(), 'obs=', len(fx), 'cols=', list(fx.columns))
print('Curves sample:')
print(curves.head())
print('Curve currencies:', sorted(curves.ccy.unique()))


In [None]:

# --- Weekly alignment (Wednesdays with +/- 2 day tolerance, prefer delaying) ---

def align_to_weekly_wed(series_or_df, start, end):
    target = pd.date_range(start=start, end=end, freq='W-WED')
    idx = series_or_df.index
    out_vals, out_dates = [], []
    for d in target:
        choices = [d + pd.Timedelta(days=k) for k in [0,1,2,-1,-2]]
        chosen = next((c for c in choices if c in idx), None)
        if chosen is None:
            continue
        out_dates.append(d)
        out_vals.append(series_or_df.loc[chosen])
    if isinstance(series_or_df, pd.Series):
        return pd.Series(out_vals, index=pd.DatetimeIndex(out_dates), name=series_or_df.name)
    return pd.DataFrame(out_vals, index=pd.DatetimeIndex(out_dates), columns=series_or_df.columns)

curve_wide = curves.pivot_table(index=['date','ccy'], columns='tenor', values='rate', aggfunc='mean').sort_index()

common_start = max(ois.index.min(), fx.index.min(), curves['date'].min())
common_end = min(ois.index.max(), fx.index.max(), curves['date'].max())

ois_w = align_to_weekly_wed(ois, common_start, common_end)
fx_w = align_to_weekly_wed(fx, common_start, common_end)
dates = ois_w.index.intersection(fx_w.index)

# Weekly curves per ccy, but do NOT force intersection across currencies
curve_weekly = {}
for c in EM_CCY:
    if c not in curve_wide.index.get_level_values('ccy'):
        continue
    cdf = curve_wide.xs(c, level='ccy', drop_level=True).sort_index()
    cw = align_to_weekly_wed(cdf, common_start, common_end)
    curve_weekly[c] = cw.reindex(dates).ffill()

ois_w = ois_w.reindex(dates).ffill()
fx_w = fx_w.reindex(dates).ffill()

print('Weekly aligned dates:', len(dates), dates.min().date(), 'to', dates.max().date())
for c in EM_CCY:
    if c in curve_weekly:
        print(f'{c} curve non-missing rows:', int(curve_weekly[c][5.0].notna().sum()) if 5.0 in curve_weekly[c].columns else 0)
print('Missing FX counts:')
print(fx_w[EM_CCY + ['GBP']].isna().sum())


In [None]:

# --- Curve utilities (bootstrap-ish discounting + bond pricing) ---

def interp_rate(tenors, rates, t):
    tenors = np.array(tenors, dtype=float)
    rates = np.array(rates, dtype=float)
    m = np.isfinite(tenors) & np.isfinite(rates)
    tenors, rates = tenors[m], rates[m]
    if len(tenors) == 0:
        return np.nan
    order = np.argsort(tenors)
    tenors, rates = tenors[order], rates[order]
    return float(np.interp(t, tenors, rates, left=rates[0], right=rates[-1]))


def bootstrap_discount_from_par(par_curve: pd.Series, freq=4, max_t=5.0):
    # Build quarterly grid discount factors from interpolated par rates
    times = np.arange(1/freq, max_t + 1e-12, 1/freq)
    dfs = {}
    for t in times:
        s = interp_rate(par_curve.index.values, par_curve.values, t)
        c = s / freq
        # par pricing: 1 = c*sum_{i=1}^{n-1}DF_i + (1+c)*DF_n
        prev_times = times[times < t]
        pv_cpn_prev = sum(c * dfs[pt] for pt in prev_times)
        df_t = (1.0 - pv_cpn_prev) / (1.0 + c)
        dfs[t] = max(df_t, 1e-10)
    return pd.Series(dfs)


def price_fixed_bond(coupon_rate, rem_t, par_curve, freq=4):
    pay_times = np.arange(1/freq, rem_t + 1e-12, 1/freq)
    if len(pay_times) == 0:
        return 1.0
    zcb = bootstrap_discount_from_par(par_curve, freq=freq, max_t=max(5.0, rem_t+0.25))
    # interpolate discount factors on needed times
    z_times = zcb.index.values
    z_vals = zcb.values
    dfs = np.interp(pay_times, z_times, z_vals, left=z_vals[0], right=z_vals[-1])
    c = coupon_rate / freq
    cash = np.full(len(pay_times), c)
    cash[-1] += 1.0
    return float(np.sum(cash * dfs))

# sanity check: 5Y par bond should be close to 1 on same curve
sanity = []
for c in EM_CCY:
    if c not in curve_weekly:
        continue
    row = curve_weekly[c].iloc[0].dropna()
    if row.empty:
        continue
    s5 = interp_rate(row.index.values, row.values, 5.0)
    p = price_fixed_bond(s5, 5.0, row, freq=4)
    sanity.append((c, s5, p))
sanity_df = pd.DataFrame(sanity, columns=['ccy','s5','par_price'])
print(sanity_df)


In [None]:

# --- Strategy backtest ---

records = []
weekly_port_ret = []
used_funding_proxy = True  # GBP swap curve not available in data set

for i in range(len(dates)-1):
    t0, t1 = dates[i], dates[i+1]
    ois_rate = float(ois_w.loc[t0])
    fund_spread = ois_rate + 0.005  # OIS + 50bp

    ccy_rets = {}
    for c in EM_CCY:
        if c not in curve_weekly:
            continue
        crv0 = curve_weekly[c].loc[t0].dropna()
        crv1 = curve_weekly[c].loc[t1].dropna()
        if crv0.empty or crv1.empty:
            continue
        s5_lend = interp_rate(crv0.index.values, crv0.values, 5.0)
        s5_fund = ois_rate  # fallback threshold proxy since GBP swap curve unavailable
        active = bool(np.isfinite(s5_lend) and np.isfinite(s5_fund) and (s5_lend >= s5_fund + 0.005))

        usd0 = fx_w.at[t0, c]
        usd1 = fx_w.at[t1, c]
        gbp0 = fx_w.at[t0, 'GBP']
        gbp1 = fx_w.at[t1, 'GBP']
        if not np.isfinite(usd0) or not np.isfinite(usd1) or not np.isfinite(gbp0) or not np.isfinite(gbp1):
            active = False

        pnl_usd = 0.0
        ret = 0.0
        if active:
            # lending leg: +10MM USD notionally invested in CCY par bond
            lend_usd = 10_000_000.0
            units_ccy = lend_usd / usd0
            p0 = 1.0
            p1 = price_fixed_bond(coupon_rate=s5_lend, rem_t=5.0 - 1/52, par_curve=crv1, freq=4)
            lend_end_usd = units_ccy * p1 * usd1

            # borrowing leg: borrow 8MM USD equivalent in GBP, ΔV=0 in GBP; add 1-week interest accrual
            borrow_usd0 = 8_000_000.0
            borrow_gbp_units = borrow_usd0 / gbp0
            borrow_end_usd = borrow_gbp_units * (1 + fund_spread/52.0) * gbp1

            # equity PnL relative to 2MM equity (10MM asset - 8MM debt)
            equity0 = 2_000_000.0
            equity1 = lend_end_usd - borrow_end_usd
            pnl_usd = equity1 - equity0
            ret = pnl_usd / equity0

        records.append({
            'date': t0,
            'next_date': t1,
            'ccy': c,
            'active': int(active),
            's5_lend': s5_lend,
            's5_fund_threshold': s5_fund + 0.005,
            'ois': ois_rate,
            'fx_usd_per_ccy_t0': usd0,
            'fx_usd_per_ccy_t1': usd1,
            'fx_usd_per_gbp_t0': gbp0,
            'fx_usd_per_gbp_t1': gbp1,
            'pnl_usd': pnl_usd,
            'ret': ret
        })
        ccy_rets[c] = ret if active else np.nan

    active_rets = [v for v in ccy_rets.values() if np.isfinite(v)]
    port_ret = float(np.mean(active_rets)) if active_rets else 0.0
    weekly_port_ret.append({'date': t0, 'portfolio_ret': port_ret, 'active_positions': len(active_rets)})

res = pd.DataFrame(records)
port = pd.DataFrame(weekly_port_ret).set_index('date').sort_index()

print('Total currency-week rows:', len(res))
print('Active ratio:', res['active'].mean())
print('Funding threshold used OIS proxy:', used_funding_proxy)


In [None]:

# --- Performance analytics + artifacts ---

def max_drawdown(r):
    wealth = (1+r.fillna(0)).cumprod()
    dd = wealth/wealth.cummax() - 1
    return float(dd.min())

stats = []
pivot_ret = res.pivot(index='date', columns='ccy', values='ret').sort_index()
for c in pivot_ret.columns:
    r = pivot_ret[c].dropna()
    if len(r) == 0:
        continue
    stats.append({
        'ccy': c,
        'mean_weekly': r.mean(),
        'vol_weekly': r.std(ddof=1),
        'sharpe_weekly': (r.mean()/r.std(ddof=1) if r.std(ddof=1)>0 else np.nan),
        'max_drawdown': max_drawdown(r),
        'active_weeks': int(r.notna().sum())
    })
stats_df = pd.DataFrame(stats).sort_values('sharpe_weekly', ascending=False)
corr = pivot_ret.corr(min_periods=20)

port['wealth'] = (1+port['portfolio_ret']).cumprod()

# drop-one diagnostic
diag = []
base_sharpe = port['portfolio_ret'].mean()/port['portfolio_ret'].std(ddof=1)
for c in pivot_ret.columns:
    sub = pivot_ret.drop(columns=[c])
    ew = sub.mean(axis=1, skipna=True).fillna(0)
    sh = ew.mean()/ew.std(ddof=1) if ew.std(ddof=1)>0 else np.nan
    diag.append({'ccy': c, 'drop_one_sharpe': sh, 'delta_vs_base': sh-base_sharpe})
diag_df = pd.DataFrame(diag).sort_values('delta_vs_base')

stats_df.to_csv(OUT/'currency_stats.csv', index=False)
corr.to_csv(OUT/'currency_corr.csv')
port.reset_index().to_csv(OUT/'portfolio_weekly_returns.csv', index=False)
diag_df.to_csv(OUT/'drop_one_diagnostic.csv', index=False)

# figures
plt.figure(figsize=(9,4))
plt.plot(port.index, port['wealth'])
plt.title('FX Carry Portfolio Wealth (weekly EW across active positions)')
plt.ylabel('Wealth Index')
plt.tight_layout()
plt.savefig(FIG/'portfolio_wealth.png', dpi=140)
plt.close()

plt.figure(figsize=(6,5))
mat = corr.values
plt.imshow(mat, cmap='RdBu_r', vmin=-1, vmax=1)
plt.colorbar(label='Correlation')
plt.xticks(range(len(corr.columns)), corr.columns, rotation=45, ha='right')
plt.yticks(range(len(corr.index)), corr.index)
for i in range(mat.shape[0]):
    for j in range(mat.shape[1]):
        val = mat[i,j]
        if np.isfinite(val):
            plt.text(j, i, f'{val:.2f}', ha='center', va='center', fontsize=8)
plt.title('Currency Return Correlation')
plt.tight_layout()
plt.savefig(FIG/'corr_heatmap.png', dpi=140)
plt.close()

for c in pivot_ret.columns:
    w = (1+pivot_ret[c].fillna(0)).cumprod()
    plt.figure(figsize=(8,3.5))
    plt.plot(w.index, w.values)
    plt.title(f'{c} strategy wealth')
    plt.tight_layout()
    plt.savefig(FIG/f'currency_wealth_{c}.png', dpi=120)
    plt.close()

print('Saved outputs:')
for f in sorted(OUT.glob('*.csv')):
    print(' -', f)
for f in sorted(FIG.glob('*.png'))[:5]:
    print(' -', f)

print('\nPortfolio summary:')
print(port[['portfolio_ret','active_positions']].describe().T)
print('\nCurrency stats:')
print(stats_df)
