In [1]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
print(np.__version__, pd.__version__)

2.4.1 2.3.3


In [1]:
from source.rw import load_usdzar_csv, rw_forecast_table

s = load_usdzar_csv("data/usd_zar_daily.csv")

targets = [
    "2026-02-01",
    "2026-03-01",
    "2026-06-01",
]

tbl = rw_forecast_table(
    s,
    targets,
    window=252,      # we're assuming 252 business days in a year
    drift=True,       # Set True to allow for drift (non-zero mean)
    prob_gt=18.0,     # optional: probability USD/ZAR > 18
    method="normal"
)

tbl

Unnamed: 0,current_date,target_date,h_steps,S0,window_mean,window_std,method,q05,q25,q50,q75,q95,P(S>K),K
0,2026-01-13,2026-02-01,13,16.454,-0.000547,0.006694,normal,15.70157,16.073663,16.337484,16.605635,16.999153,3e-05,18.0
1,2026-01-13,2026-03-01,33,16.454,-0.000547,0.006694,normal,15.169313,15.74607,16.159838,16.584478,17.215041,0.002521,18.0
2,2026-01-13,2026-06-01,99,16.454,-0.000547,0.006694,normal,13.969688,14.902419,15.587196,16.303438,17.39199,0.015358,18.0


In [2]:
from source.rw import load_usdzar_csv, rw_past_table

past_targets = [
    "2022-02-01",
    "2023-03-01",
    "2024-06-01",
]

tbl2 = rw_past_table(
    s,
    start_date="2021-01-01",
    target_dates=past_targets,
    window=252,      # we're assuming 252 business days in a year
    drift=True,      # Set True to allow for drift (non-zero mean)
    method="bootstrap"
)

tbl2

Unnamed: 0,current_date,target_date,h_steps,S0,window_mean,window_std,method,q05,q25,q50,q75,q95,actual
0,2020-12-31,2022-02-01,271,14.6866,0.00011,0.010526,bootstrap,11.402836,13.484725,15.147171,17.020942,20.152988,15.2427
1,2020-12-31,2023-03-01,543,14.6866,0.00011,0.010526,bootstrap,10.407753,13.209144,15.535425,18.368489,23.434542,18.1364
2,2020-12-31,2024-05-31,849,14.6866,0.00011,0.010526,bootstrap,9.691624,13.107734,16.072751,19.804121,26.826777,18.6998


In [2]:
import os 
os.getcwd()

'c:\\FX-RW'

In [3]:
from pathlib import Path
Path().resolve()

WindowsPath('C:/FX-RW')

In [19]:
df=pd.read_csv("data/usd_zar_daily.csv",parse_dates=["date"])

In [8]:
df.head()

Unnamed: 0,date,usd_zar
0,2015-12-31,15.5718
1,2016-01-04,15.5586
2,2016-01-05,15.6005
3,2016-01-06,15.8287
4,2016-01-07,16.1011


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2567 entries, 0 to 2566
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   date     2567 non-null   datetime64[ns]
 1   usd_zar  2567 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 40.2 KB


In [25]:
df["delta_ret"] = df["usd_zar"].pct_change()
df["log_ret"] = np.log(df["usd_zar"]).diff()
df["day_gap"] = df["date"].diff().dt.days
df.head(10)

Unnamed: 0,date,usd_zar,delta_ret,log_ret,day_gap,gap_type
0,2015-12-31,15.5718,,,,multi-day
1,2016-01-04,15.5586,-0.000848,-0.000848,4.0,multi-day
2,2016-01-05,15.6005,0.002693,0.002689,1.0,1-day
3,2016-01-06,15.8287,0.014628,0.014522,1.0,1-day
4,2016-01-07,16.1011,0.017209,0.017063,1.0,1-day
5,2016-01-08,16.0087,-0.005739,-0.005755,1.0,1-day
6,2016-01-11,16.5755,0.035406,0.034793,3.0,multi-day
7,2016-01-12,16.587,0.000694,0.000694,1.0,1-day
8,2016-01-13,16.4897,-0.005866,-0.005883,1.0,1-day
9,2016-01-14,16.568,0.004748,0.004737,1.0,1-day


In [26]:
df["gap_type"] = np.where(df["day_gap"] == 1, "1-day", "multi-day")

# Compare distributions (magnitude is usually what you care about)
summary = df.dropna().groupby("gap_type")[["delta_ret", "log_ret"]].agg(
    mean=("delta_ret", "mean"),
    std=("delta_ret", "std"),
    mean_abs=("delta_ret", lambda x: x.abs().mean()),
    median_abs=("delta_ret", lambda x: x.abs().median()),
    count=("delta_ret", "count"),
)
summary

Unnamed: 0_level_0,mean,std,mean_abs,median_abs,count
gap_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-day,5.9e-05,0.009384,0.007174,0.005791,2032
multi-day,0.000106,0.009827,0.007351,0.005636,534
