# HW6 — FX Carry Strategy (GBP funding)

In [None]:
from pathlib import Path
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 220)
BASE = Path('.').resolve()
OUT = BASE/'outputs'
FIG = OUT/'figures'
OUT.mkdir(parents=True, exist_ok=True)
FIG.mkdir(parents=True, exist_ok=True)

print('Working directory:', BASE)
print('Directory inventory:')
for d in [BASE/'data_clean', BASE/'data', BASE/'../../Data']:
    files = sorted(d.glob('*')) if d.exists() else []
    print(' -', d, 'files=', len(files))
    for f in files[:10]:
        print('    ', f.name)

In [None]:
def pick_file(paths, patterns):
    hits=[]
    for pp in paths:
        if not pp.exists():
            continue
        for pat in patterns:
            hits += list(pp.glob(pat))
    hits = sorted(set(hits), key=lambda x:(x.stat().st_mtime, x.name), reverse=True)
    if not hits:
        return None
    print('Chosen file:', hits[0])
    return hits[0]

def load_sonia():
    fp=pick_file([BASE/'data_clean'], ['*IUDSOIA*.parquet','*IUDSOIA*.csv'])
    if fp is None:
        raise FileNotFoundError('No *IUDSOIA* file found in data_clean')
    if fp.suffix=='.parquet':
        df=pd.read_parquet(fp)
    else:
        df=pd.read_csv(fp)
    if isinstance(df.index, pd.DatetimeIndex):
        idx=pd.to_datetime(df.index, errors='coerce').tz_localize(None)
        s=pd.Series(pd.to_numeric(df.iloc[:,0], errors='coerce').values, index=idx, name='sonia')
    else:
        date_col=next((c for c in df.columns if str(c).upper()=='DATE'), df.columns[0])
        val_col=next((c for c in df.columns if 'IUDSOIA' in str(c).upper()), [c for c in df.columns if c!=date_col][0])
        s=pd.Series(pd.to_numeric(df[val_col], errors='coerce').values, index=pd.to_datetime(df[date_col], errors='coerce'), name='sonia')
    s=s.dropna().sort_index()
    if s.median()>1:
        s=s/100.0
    return s

def load_gbp5y(sonia_idx):
    fp=pick_file([BASE/'data_clean'], ['*boe*ois*daily*raw*.parquet','*ois*daily*raw*.parquet'])
    if fp is None:
        inv=sorted((BASE/'data_clean').glob('*'))
        raise RuntimeError('Missing boe raw OIS archive. inventory=' + ','.join(x.name for x in inv))
    df=pd.read_parquet(fp)
    if not {'__source_file','__sheet','UK OIS spot curve'}.issubset(df.columns):
        raise RuntimeError('boe raw missing required columns')
    sub=df[df['__sheet'].astype(str).str.contains('spot curve', case=False, na=False)].copy()
    pieces=[]
    for src,g in sub.groupby('__source_file'):
        vals=pd.to_numeric(g['UK OIS spot curve'], errors='coerce').dropna().reset_index(drop=True)
        vals=vals[(vals>-5)&(vals<25)]
        if len(vals)>2 and abs(vals.iloc[0]-1.0)<1e-8 and abs(vals.iloc[1]-1/12)<1e-8:
            vals=vals.iloc[2:].reset_index(drop=True)
        if len(vals)<100:
            continue
        years=[int(x) for x in re.findall(r'(20\d{2})', str(src))]
        if years:
            y0,y1=min(years),max(years)
            idx_pool=pd.DatetimeIndex([d for d in sonia_idx if y0<=d.year<=y1])
        else:
            idx_pool=sonia_idx
        if len(idx_pool)<len(vals):
            idx_pool=sonia_idx[-len(vals):]
        else:
            idx_pool=idx_pool[:len(vals)]
        pieces.append(pd.Series(vals.values, index=idx_pool))
    if not pieces:
        inv=sorted((BASE/'data_clean').glob('*'))
        msg='Could not build GBP 5Y from boe raw. inventory=' + ','.join(x.name for x in inv)
        raise RuntimeError(msg)
    s=pd.concat(pieces).sort_index()
    s=s[~s.index.duplicated(keep='last')].dropna()
    if s.median()>1:
        s=s/100.0
    s.name='gbp5y'
    return s

def load_fx(em):
    fp=pick_file([BASE/'data_clean'], ['*usd_per_ccy*wide*.parquet','*edi*cur*fx_long*.parquet'])
    if fp is None:
        raise FileNotFoundError('No FX file found in data_clean')
    if 'fx_long' in fp.name:
        d=pd.read_parquet(fp)
        fx=d.pivot(index='date', columns='ccy', values='value')
    else:
        fx=pd.read_parquet(fp)
    if not isinstance(fx.index,pd.DatetimeIndex):
        fx.index=pd.to_datetime(fx.index, errors='coerce')
    fx.index=fx.index.tz_localize(None)
    fx.columns=[str(c).upper().strip() for c in fx.columns]
    fx=fx.apply(pd.to_numeric, errors='coerce').sort_index()
    need=sorted(set(em+['GBP']))
    for c in need:
        if c not in fx.columns:
            fx[c]=np.nan
    fx=fx[need]
    med=fx['GBP'].dropna().median()
    if np.isfinite(med) and med<0.3:
        fx=1.0/fx
        print('FX inverted to USD per CCY')
    assert fx['GBP'].dropna().between(0.3,5.0).mean()>0.95, 'GBP quote sanity failed'
    jumps=(fx.pct_change().abs()>0.20).sum()
    print('Daily FX jumps >20% counts:')
    print(jumps)
    return fx

def parse_curve(fp):
    raw=pd.read_csv(fp)
    out=[]
    cols=list(raw.columns)
    for i in range(0,len(cols),2):
        c0=cols[i]
        c1=cols[i+1] if i+1<len(cols) else None
        if c1 is None:
            continue
        m=re.search(r'GT([A-Z]{3})(\d+)(Y|YR)', str(c0).upper())
        if not m:
            continue
        tmp=pd.DataFrame({
            'date':pd.to_datetime(raw[c0], errors='coerce'),
            'ccy':m.group(1),
            'tenor':float(m.group(2)),
            'rate':pd.to_numeric(raw[c1], errors='coerce')
        }).dropna(subset=['date','rate'])
        out.append(tmp)
    if not out:
        raise RuntimeError('No parsable tenors in '+str(fp))
    return pd.concat(out, ignore_index=True)

def load_curves():
    dirs=[BASE/'../../Data', BASE/'data', BASE.parent/'data']
    files=[]
    for d in dirs:
        if d.exists():
            files += list(d.glob('*Emerging*Mkt*YC*.csv'))
    files=sorted(set(files), key=lambda x:(x.stat().st_mtime,x.name), reverse=True)
    if not files:
        raise FileNotFoundError('No EM YC csv files found in ../../Data or ./data')
    print('Curve files:', [f.name for f in files])
    df=pd.concat([parse_curve(f) for f in files], ignore_index=True)
    if df['rate'].median()>1:
        df['rate']=df['rate']/100.0
    df['date']=pd.to_datetime(df['date']).dt.tz_localize(None)
    df=df.groupby(['date','ccy','tenor'], as_index=False)['rate'].mean()
    return df

EM=['BRL','NGN','PKR','TRY','ZAR']
sonia=load_sonia()
gbp5=load_gbp5y(sonia.index)
fx=load_fx(EM)
curves=load_curves()
print('Coverage sonia', sonia.index.min().date(), sonia.index.max().date(), len(sonia))
print('Coverage gbp5 ', gbp5.index.min().date(), gbp5.index.max().date(), len(gbp5))
print('Coverage fx   ', fx.index.min().date(), fx.index.max().date(), len(fx))
print('Coverage curve', curves['date'].min().date(), curves['date'].max().date(), len(curves))

In [None]:
def align_weekly(obj, start, end, freq='W-WED'):
    t=pd.date_range(start, end, freq=freq)
    out=[]; idx=[]
    for d in t:
        cand=[d,d+pd.Timedelta(days=1),d+pd.Timedelta(days=2),d-pd.Timedelta(days=1),d-pd.Timedelta(days=2)]
        c=next((x for x in cand if x in obj.index), None)
        if c is None:
            continue
        out.append(obj.loc[c]); idx.append(d)
    if isinstance(obj,pd.Series):
        return pd.Series(out, index=pd.DatetimeIndex(idx), name=obj.name)
    return pd.DataFrame(out, index=pd.DatetimeIndex(idx), columns=obj.columns)

curve_wide=curves.pivot_table(index=['date','ccy'], columns='tenor', values='rate', aggfunc='mean').sort_index()
start=max(sonia.index.min(), gbp5.index.min(), fx.index.min(), curves['date'].min())
end=min(sonia.index.max(), gbp5.index.max(), fx.index.max(), curves['date'].max())

sonia_w=align_weekly(sonia,start,end)
gbp5_w=align_weekly(gbp5,start,end)
fx_w=align_weekly(fx,start,end)
dates=sonia_w.index.intersection(gbp5_w.index).intersection(fx_w.index)

curve_ccy={}
for c in EM:
    if c not in curve_wide.index.get_level_values('ccy'):
        continue
    curve_ccy[c]=align_weekly(curve_wide.xs(c,level='ccy'),start,end).reindex(dates).ffill()

sonia_w=sonia_w.reindex(dates).ffill()
gbp5_w=gbp5_w.reindex(dates).ffill()
fx_w=fx_w.reindex(dates).ffill()

print('Weekly dates', dates.min().date(), dates.max().date(), 'n=', len(dates))
print('Missing weekly sonia', int(sonia_w.isna().sum()), 'gbp5', int(gbp5_w.isna().sum()))
print('Missing weekly fx by ccy')
print(fx_w.isna().sum())

In [None]:
def interp_rate(tenors, rates, t):
    x=np.array(tenors,dtype=float); y=np.array(rates,dtype=float)
    m=np.isfinite(x)&np.isfinite(y)
    x=x[m]; y=y[m]
    if len(x)==0:
        return np.nan
    o=np.argsort(x); x=x[o]; y=y[o]
    return float(np.interp(t,x,y,left=y[0],right=y[-1]))

def bootstrap_df(par_curve,freq=4,max_t=5.0):
    grid=np.arange(1/freq,max_t+1e-12,1/freq)
    d={}
    for t in grid:
        s=interp_rate(par_curve.index.values, par_curve.values, t)
        c=s/freq
        pv_prev=sum(c*d[pt] for pt in grid if pt<t)
        d[t]=max((1-pv_prev)/(1+c),1e-10)
    return pd.Series(d)

def price_bond(coupon_rate, par_curve, pay_times, freq=4):
    if len(pay_times)==0:
        return 1.0
    z=bootstrap_df(par_curve,freq=freq,max_t=max(5.0,float(np.max(pay_times))+0.25))
    dfs=np.interp(pay_times,z.index.values,z.values,left=z.values[0],right=z.values[-1])
    cf=np.full(len(pay_times), coupon_rate/freq)
    cf[-1]+=1.0
    return float(np.sum(cf*dfs))

entry_t=np.arange(0.25,5.0+1e-12,0.25)
exit_t=entry_t-1/52
exit_t=exit_t[exit_t>0]
print('entry head', entry_t[:5], 'tail', entry_t[-5:])
print('exit head', exit_t[:5], 'tail', exit_t[-5:])
print('shift check', bool(np.allclose(entry_t[:len(exit_t)]-exit_t, 1/52, atol=1e-10)))

# par checks
checks=[]
rng=np.random.default_rng(0)
for c in EM:
    if c not in curve_ccy:
        continue
    cdf=curve_ccy[c].dropna(how='all')
    if cdf.empty:
        continue
    for j in rng.choice(len(cdf), size=min(4,len(cdf)), replace=False):
        row=cdf.iloc[int(j)].dropna()
        if row.empty:
            continue
        s5=interp_rate(row.index.values,row.values,5.0)
        p=price_bond(s5,row,entry_t)
        checks.append((c,cdf.index[int(j)],s5,p))
chk=pd.DataFrame(checks,columns=['ccy','date','s5','par_price'])
print(chk.head(10))
print('max abs par error', float((chk['par_price']-1).abs().max()) if len(chk) else np.nan)

In [None]:
rows=[]
port=[]
for i in range(len(dates)-1):
    t0,t1=dates[i],dates[i+1]
    s5fund=float(gbp5_w.loc[t0])
    ois=float(sonia_w.loc[t0])
    borrow=ois+0.005
    ccy_rets=[]
    active_count=0
    for c in EM:
        if c not in curve_ccy:
            continue
        c0=curve_ccy[c].loc[t0].dropna(); c1=curve_ccy[c].loc[t1].dropna()
        if c0.empty or c1.empty:
            continue
        s5lend=interp_rate(c0.index.values,c0.values,5.0)
        active=bool(np.isfinite(s5lend) and np.isfinite(s5fund) and (s5lend>=s5fund+0.005))

        fx0=fx_w.at[t0,c]; fx1=fx_w.at[t1,c]
        gbp0=fx_w.at[t0,'GBP']; gbp1=fx_w.at[t1,'GBP']
        if not np.isfinite([fx0,fx1,gbp0,gbp1]).all():
            active=False
        ret=np.nan; pnl=0.0
        if active:
            units=10_000_000/fx0
            p1=price_bond(s5lend,c1,exit_t)
            lend_end=units*p1*fx1

            debt_units=8_000_000/gbp0
            debt_end=debt_units*(1+borrow/52)*gbp1

            eq0=2_000_000
            eq1=lend_end-debt_end
            pnl=eq1-eq0
            ret=pnl/eq0
            active_count+=1
            ccy_rets.append(ret)

        rows.append({'date':t0,'next_date':t1,'ccy':c,'active':int(active),'s5_lend':s5lend,'s5_fund':s5fund,'hurdle':s5fund+0.005,'ois':ois,'ret':ret,'pnl_usd':pnl})
    pr=float(np.mean(ccy_rets)) if ccy_rets else 0.0
    port.append({'date':t0,'port_ret':pr,'active_positions':active_count})

res=pd.DataFrame(rows)
port=pd.DataFrame(port).set_index('date').sort_index()
port['wealth']=(1+port['port_ret']).cumprod()
port['dd']=port['wealth']/port['wealth'].cummax()-1
assert (port['dd']>=-1-1e-10).all()

active_diag=res.groupby('ccy',as_index=False)['active'].mean().rename(columns={'active':'active_frac'})
active_diag.loc[len(active_diag)]={'ccy':'ALL_AVG_ACTIVE_POS','active_frac':port['active_positions'].mean()}
active_diag.to_csv(OUT/'active_diagnostics.csv', index=False)
print(active_diag)

In [None]:
def ann_sharpe(r):
    s=r.std(ddof=1)
    return np.sqrt(52)*r.mean()/s if s>0 else np.nan

def maxdd_from_ret(r):
    w=(1+r.fillna(0)).cumprod()
    return float((w/w.cummax()-1).min())

pivot=res.pivot(index='date',columns='ccy',values='ret').sort_index()
stats=[]
for c in pivot.columns:
    r=pivot[c].dropna()
    if len(r)==0:
        continue
    stats.append({'ccy':c,'mean_weekly':r.mean(),'vol_weekly':r.std(ddof=1),'ann_sharpe':ann_sharpe(r),'active_frac':float(res.loc[res.ccy==c,'active'].mean()),'pnl_sum_usd':float(res.loc[res.ccy==c,'pnl_usd'].sum()),'max_dd_wealth':maxdd_from_ret(r),'active_weeks':int(r.notna().sum())})
stats_df=pd.DataFrame(stats).sort_values('ann_sharpe',ascending=False)
corr=pivot.corr(min_periods=25)

base=port['port_ret']
base_sh=ann_sharpe(base)
base_dd=float(port['dd'].min())
rows=[]
for c in pivot.columns:
    ew=pivot.drop(columns=[c]).mean(axis=1,skipna=True).fillna(0)
    w=(1+ew).cumprod(); dd=float((w/w.cummax()-1).min())
    rows.append({'ccy_removed':c,'ann_sharpe_drop_one':ann_sharpe(ew),'max_dd_drop_one':dd,'delta_sharpe':ann_sharpe(ew)-base_sh,'delta_dd':dd-base_dd})
drop_df=pd.DataFrame(rows).sort_values('delta_sharpe')

stats_df.to_csv(OUT/'currency_stats.csv',index=False)
corr.to_csv(OUT/'currency_corr.csv')
drop_df.to_csv(OUT/'drop_one_diagnostic.csv',index=False)
port.reset_index().to_csv(OUT/'portfolio_weekly_returns.csv',index=False)

plt.figure(figsize=(9,4)); plt.plot(port.index,port['wealth']); plt.title('Portfolio wealth'); plt.tight_layout(); plt.savefig(FIG/'portfolio_wealth.png',dpi=130); plt.close()
plt.figure(figsize=(9,4)); plt.plot(port.index,port['dd']); plt.title('Portfolio drawdown'); plt.tight_layout(); plt.savefig(FIG/'portfolio_drawdown.png',dpi=130); plt.close()
plt.figure(figsize=(6,5)); mat=corr.values; plt.imshow(mat,cmap='RdBu_r',vmin=-1,vmax=1); plt.colorbar(); plt.xticks(range(len(corr.columns)),corr.columns,rotation=45,ha='right'); plt.yticks(range(len(corr.index)),corr.index)
for i in range(mat.shape[0]):
    for j in range(mat.shape[1]):
        if np.isfinite(mat[i,j]):
            plt.text(j,i,f'{mat[i,j]:.2f}',ha='center',va='center',fontsize=8)
plt.tight_layout(); plt.savefig(FIG/'corr_heatmap.png',dpi=130); plt.close()
print('core outputs done')

In [None]:
# Market factor analysis (proxy factors if no external factors)
factor_hits=[]
for pat in ['*VIX*','*DXY*','*SPX*','*MSCI*','*UST*','*rates*','*factors*']:
    factor_hits += list((BASE/'data_clean').glob(pat))
print('factor files found', [f.name for f in sorted(set(factor_hits))])

emfx=np.log(fx_w[EM]).diff().mean(axis=1)
usd_strength=-np.log(fx_w['GBP']).diff()
proxy_usd=0.5*usd_strength+0.5*emfx
proxy_d_gbp5=gbp5_w.diff()
proxy_d_sonia=sonia_w.diff()

factors=pd.DataFrame({'proxy_usd_broad':proxy_usd,'proxy_emfx_basket_ret':emfx,'proxy_rates_d_gbp5y':proxy_d_gbp5,'proxy_rates_d_sonia':proxy_d_sonia}, index=port.index)
mf=pd.concat([port['port_ret'],factors],axis=1).dropna()

corr_f=mf.corr().loc[factors.columns,['port_ret']].rename(columns={'port_ret':'corr_with_port'})
corr_f.to_csv(OUT/'market_factor_corr.csv')

regs=[]
y=mf['port_ret'].values
for fac in factors.columns:
    x=mf[[fac]].values[:,0]
    X=np.column_stack([np.ones(len(x)),x])
    b=np.linalg.lstsq(X,y,rcond=None)[0]
    yh=X@b
    e=y-yh
    n=len(y); k=2
    s2=(e@e)/(n-k)
    cov=s2*np.linalg.inv(X.T@X)
    se=np.sqrt(np.diag(cov))
    t=b[1]/se[1] if se[1]>0 else np.nan
    r2=1-(e@e)/np.sum((y-y.mean())**2)
    regs.append({'model':'univariate','factor':fac,'alpha':b[0],'beta':b[1],'t_beta':t,'r2':r2,'n':n})

X=np.column_stack([np.ones(len(mf))]+[mf[c].values for c in factors.columns])
b=np.linalg.lstsq(X,y,rcond=None)[0]
yh=X@b
e=y-yh
n=len(y); k=X.shape[1]
s2=(e@e)/(n-k)
cov=s2*np.linalg.inv(X.T@X)
se=np.sqrt(np.diag(cov))
r2=1-(e@e)/np.sum((y-y.mean())**2)
labels=['const']+list(factors.columns)
for j,lab in enumerate(labels):
    regs.append({'model':'multivariate','factor':lab,'alpha':b[0],'beta':b[j],'t_beta':(b[j]/se[j] if se[j]>0 else np.nan),'r2':r2,'n':n})
reg_df=pd.DataFrame(regs)
reg_df.to_csv(OUT/'market_factor_regs.csv',index=False)

for fac in factors.columns:
    tmp=mf[['port_ret',fac]].dropna()
    plt.figure(figsize=(5,4)); plt.scatter(tmp[fac],tmp['port_ret'],s=10,alpha=0.6)
    z=np.polyfit(tmp[fac],tmp['port_ret'],1)
    xs=np.linspace(tmp[fac].min(),tmp[fac].max(),100)
    plt.plot(xs,z[0]*xs+z[1],color='red'); plt.xlabel(fac); plt.ylabel('port_ret'); plt.tight_layout(); plt.savefig(FIG/f'factor_scatter_{fac}.png',dpi=120); plt.close()

print(corr_f)
print(reg_df.head(12))