In [1]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd

REPO_ROOT = Path.cwd().parent.parent
BACKEND_DIR = REPO_ROOT / "backend"
sys.path.insert(0, str(BACKEND_DIR))

from data_engine.yfinance_fetcher import YFinanceFetcher
from data_engine.forecasting.base_forecaster import SimpleForecaster

In [2]:
fetcher = YFinanceFetcher()
SYMBOL = "BTC-USD"
INTERVAL = "1wk"
PERIOD = "5y"

df = fetcher.fetch_history(SYMBOL, interval=INTERVAL, period=PERIOD)
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values("timestamp")

prices = df.set_index("timestamp")["close"].astype(float).dropna()
len(prices), prices.head()

(261,
 timestamp
 2021-02-22 00:00:00+00:00    45137.769531
 2021-03-01 00:00:00+00:00    51206.691406
 2021-03-08 00:00:00+00:00    59302.316406
 2021-03-15 00:00:00+00:00    57523.421875
 2021-03-22 00:00:00+00:00    55950.746094
 Name: close, dtype: float64)

In [3]:
from math import sqrt

TEST_SIZE = 30
SPAN = 20

def backtest_one_step(prices_full: pd.Series, test_size: int, model_factory, min_train: int):
    preds = []
    split_idx = len(prices_full) - test_size

    for i in range(split_idx, len(prices_full)):
        train = prices_full.iloc[:i]
        actual = float(prices_full.iloc[i])
        ts = prices_full.index[i]

        if len(train) < min_train:
            continue

        model = model_factory()
        model.fit(train)
        fc = model.forecast(periods=1)
        yhat = float(fc["point_forecast"][0])

        preds.append({"timestamp": ts, "y_true": actual, "y_pred": yhat})

    return pd.DataFrame(preds)

pred_baseline = backtest_one_step(
    prices, TEST_SIZE,
    model_factory=lambda: SimpleForecaster(span=SPAN, confidence_level=0.95),
    min_train=SPAN
)

pred_baseline.head()

Unnamed: 0,timestamp,y_true,y_pred
0,2025-07-28 00:00:00+00:00,114217.671875,103987.6245
1,2025-08-04 00:00:00+00:00,119306.757812,104961.9147
2,2025-08-11 00:00:00+00:00,117453.0625,106328.0902
3,2025-08-18 00:00:00+00:00,113458.429688,107387.6114
4,2025-08-25 00:00:00+00:00,108236.710938,107965.7846


In [4]:
OUTDIR = REPO_ROOT / "model" / "notebooks" / "artifacts"
OUTDIR.mkdir(parents=True, exist_ok=True)

path = OUTDIR / "pred_baseline_weekly_price.parquet"
pred_baseline.to_parquet(path, index=False)

print("Saved:", path)
print("Rows:", len(pred_baseline))

Saved: c:\Users\khanh\OneDrive - GUSCanada\Documents\capstone_project_unfc\model\notebooks\artifacts\pred_baseline_weekly_price.parquet
Rows: 30
