In [4]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import yfinance as yf


REPO_ROOT = Path.cwd().parent.parent
BACKEND_DIR = REPO_ROOT / "backend"
sys.path.insert(0, str(BACKEND_DIR))


from analytics.forecasting.base import SimpleForecaster

In [12]:

SYMBOL = "TSLA"
INTERVAL = "1wk"
PERIOD = "5y"

df = yf.download(tickers=SYMBOL, period=PERIOD, interval=INTERVAL, multi_level_index=False)
df = df.reset_index().rename(columns={"Date":"timestamp", "Close":"close"})
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values("timestamp")

# Create clean series for forecasting
prices = df.set_index("timestamp")["close"].astype(float).dropna()

print(f"Loaded {len(prices)} rows. Latest date: {prices.index.max()}")
print(f"Data type of prices: {type(prices)}") # Verified as <class 'pandas.core.series.Series'>

len(prices), prices.head()





[*********************100%***********************]  1 of 1 completed

Loaded 262 rows. Latest date: 2026-02-23 00:00:00
Data type of prices: <class 'pandas.Series'>





(262,
 timestamp
 2021-02-22    225.166672
 2021-03-01    199.316666
 2021-03-08    231.243332
 2021-03-15    218.289993
 2021-03-22    206.236664
 Name: close, dtype: float64)

In [13]:
from math import sqrt

TEST_SIZE = 30
SPAN = 20

def backtest_one_step(prices_full: pd.Series, test_size: int, model_factory, min_train: int):
    preds = []
    split_idx = len(prices_full) - test_size

    for i in range(split_idx, len(prices_full)):
        train = prices_full.iloc[:i]
        actual = float(prices_full.iloc[i])
        ts = prices_full.index[i]

        if len(train) < min_train:
            continue

        model = model_factory()
        model.fit(train)
        fc = model.forecast(periods=1)
        yhat = float(fc["point_forecast"][0])

        preds.append({"timestamp": ts, "y_true": actual, "y_pred": yhat})

    return pd.DataFrame(preds)

pred_baseline = backtest_one_step(
    prices, TEST_SIZE,
    model_factory=lambda: SimpleForecaster(span=SPAN, confidence_level=0.95),
    min_train=SPAN
)

pred_baseline.head()

Unnamed: 0,timestamp,y_true,y_pred
0,2025-08-04,329.649994,312.9055
1,2025-08-11,330.559998,314.5002
2,2025-08-18,340.01001,316.0297
3,2025-08-25,333.869995,318.3136
4,2025-09-01,350.839996,319.7951


In [None]:
OUTDIR = REPO_ROOT / "model" / "notebooks" / "artifacts"
OUTDIR.mkdir(parents=True, exist_ok=True)

path = OUTDIR / "pred_baseline_weekly_price.parquet"
pred_baseline.to_parquet(path, index=False)

print("Saved:", path)
print("Rows:", len(pred_baseline))

Saved: c:\Users\khanh\OneDrive - GUSCanada\Documents\capstone_project_unfc\model\notebooks\artifacts\pred_baseline_weekly_price.parquet
Rows: 30
