In [1]:
!pip install tensorflow
!pip install cupy-cuda12x



In [2]:
!pip install opencv-contrib-python



In [3]:
!pip install numba



In [4]:
!pip install numpy



In [7]:
!pip install arch
!pip install statsmodels




In [None]:
import warnings, sys, math, numpy as np, pandas as pd
from datetime import datetime
warnings.filterwarnings("ignore")


from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox, het_arch


from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima

# GARCH
from arch import arch_model

# Data
import yfinance as yf

pd.set_option("display.width", 120)
pd.set_option("display.max_columns", 20)
pd.set_printoptions(suppress=True, precision=6)

In [17]:

ASSETS = {
    "BTC": "BTC-USD",
    "EURUSD": "EURUSD=X"
}

START_DATE = "2018-01-01"
END_DATE   = None


TRAIN_WINDOW = 500
ROLLING_STEPS = 60
MAX_PQ = 2
LJUNG_LAGS = 10

print("Assets:", ASSETS)
print("Start:", START_DATE, "End:", END_DATE)

Assets: {'BTC': 'BTC-USD', 'EURUSD': 'EURUSD=X'}
Start: 2018-01-01 End: None


In [14]:
!pip install yfinance



In [15]:
import yfinance as yf

In [18]:
def fetch_prices_yf(ticker, start=START_DATE, end=END_DATE, tz_utc=True):
    df = yf.download(ticker, start=start, end=end, progress=False)
    if df.empty:
        raise ValueError(f"No data returned for {ticker}")
    df = df.rename(columns={"Close": "close"})[["close"]]
    if tz_utc and df.index.tz is not None:
        df = df.tz_convert(None)  # drop tz info
    return df


prices = pd.DataFrame()
for name, yf_ticker in ASSETS.items():
    tmp = fetch_prices_yf(yf_ticker)
    tmp = tmp.rename(columns={"close": name})
    prices = tmp if prices.empty else prices.join(tmp, how="outer")

prices = prices.dropna(how="all")
print("Downloaded prices head:")
print(prices.head(10))
print("\nData coverage per asset (rows):")
print(prices.notna().sum())


Downloaded prices head:
Price                BTC    EURUSD
Ticker           BTC-USD  EURUSD=X
Date                              
2018-01-01  13657.200195  1.200495
2018-01-02  14982.099609  1.201158
2018-01-03  15201.000000  1.206345
2018-01-04  15599.200195  1.201043
2018-01-05  17429.500000  1.206884
2018-01-06  17527.000000       NaN
2018-01-07  16477.599609       NaN
2018-01-08  15170.099609  1.203746
2018-01-09  14595.400391  1.197046
2018-01-10  14973.299805  1.192933

Data coverage per asset (rows):
Price   Ticker  
BTC     BTC-USD     2784
EURUSD  EURUSD=X    1986
dtype: int64


In [19]:

logret = np.log(prices / prices.shift(1)).dropna()

print("Log returns head:")
print(logret.head(10))

def adf_summary(series, name):
    res = adfuller(series.dropna(), autolag="AIC")
    out = {
        "asset": name,
        "adf_stat": res[0],
        "p_value": res[1],
        "usedlag": res[2],
        "nobs": res[3]
    }
    return out

print("\nADF stationarity tests on returns:")
adf_rows = [adf_summary(logret[col], col) for col in logret.columns]
print(pd.DataFrame(adf_rows).set_index("asset"))


Log returns head:
Price            BTC    EURUSD
Ticker       BTC-USD  EURUSD=X
Date                          
2018-01-02  0.092589  0.000552
2018-01-03  0.014505  0.004309
2018-01-04  0.025858 -0.004406
2018-01-05  0.110945  0.004852
2018-01-09 -0.038620 -0.005582
2018-01-10  0.025562 -0.003442
2018-01-11 -0.110581  0.002245
2018-01-12  0.041983  0.007163
2018-01-16 -0.184582  0.006225
2018-01-17 -0.026625  0.000233

ADF stationarity tests on returns:
                     adf_stat  p_value  usedlag  nobs
asset                                                
(BTC, BTC-USD)     -43.043536      0.0        0  1585
(EURUSD, EURUSD=X) -40.070071      0.0        0  1585


In [1]:
!pip install --force-reinstall --no-cache-dir "numpy<2.1" pmdarima yfinance tensorflow cupy-cuda12x opencv-python numba


Collecting numpy<2.1
  Downloading numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pmdarima
  Downloading pmdarima-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Collecting yfinance
  Downloading yfinance-0.2.65-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting tensorflow
  Downloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting cupy-cuda12x
  Downloading cupy_cuda12x-13.5.1-cp311-cp311-manylinux2014_x86_64.whl.metadata (2.4 kB)
Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Collecting numba
 

In [4]:
!pip install pmdarima




In [11]:
import yfinance as yf
import numpy as np
import pandas as pd

tickers = ["AAPL", "MSFT", "GOOGL"]


data = yf.download(tickers, start="2020-01-01", end="2025-01-01", auto_adjust=False)

adj_close = data["Adj Close"]

logret = np.log(adj_close / adj_close.shift(1))


[*********************100%***********************]  3 of 3 completed


In [26]:
!pip uninstall -y numpy pmdarima

!pip install --no-cache-dir "numpy==1.26.4"

!pip install --no-cache-dir pmdarima

Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Found existing installation: pmdarima 2.0.4
Uninstalling pmdarima-2.0.4:
  Successfully uninstalled pmdarima-2.0.4
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m51.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m176.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Downloading pmdarima-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pmdarima
Successfully installed pmdarima-2.0.4


In [1]:
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

In [5]:
import yfinance as yf
import numpy as np
import pandas as pd
tickers = ["AAPL", "MSFT", "GOOGL"]

data = yf.download(tickers, start="2020-01-01", end="2025-01-01", auto_adjust=False)

adj_close = data["Adj Close"]

logret = np.log(adj_close / adj_close.shift(1))

[*********************100%***********************]  3 of 3 completed


In [None]:
arima_orders = {}
arima_models  = {}

for col in logret.columns:
    series = logret[col].dropna()

    step = auto_arima(series, start_p=0, start_q=0, max_p=MAX_PQ, max_q=MAX_PQ,
                      d=0, seasonal=False, stepwise=True, trace=False,
                      error_action="ignore", suppress_warnings=True)
    order = step.order
    arima_orders[col] = order

    mdl = ARIMA(series, order=order).fit()
    arima_models[col] = mdl

print("Selected ARIMA orders (p,d,q):")
for k, v in arima_orders.items():
    print(f"  {k}: {v}, AIC={arima_models[k].aic:.2f}")


In [None]:
def ljung_box(resid, lags=LJUNG_LAGS):
    lb = acorr_ljungbox(resid.dropna(), lags=[lags], return_df=True)
    return float(lb["lb_pvalue"].iloc[0])

def arch_lm(resid, lags=5):
    stat, pval, _, _ = het_arch(resid.dropna(), maxlag=lags)
    return float(pval)

print("Residual diagnostics (p-values):")
diag_rows = []
for col, mdl in arima_models.items():
    resid = pd.Series(mdl.resid, index=logret.index[-len(mdl.resid):], name=f"{col}_resid")
    lb_p = ljung_box(resid)
    arch_p = arch_lm(resid)
    diag_rows.append({"asset": col, "ljung_box_p": lb_p, "arch_lm_p": arch_p})
print(pd.DataFrame(diag_rows).set_index("asset"))


In [8]:
garch_results = {}
garch_params  = {}

for col, mdl in arima_models.items():
    resid = pd.Series(mdl.resid, index=logret.index[-len(mdl.resid):])

    am = arch_model(resid, mean="Zero", vol="GARCH", p=1, q=1, dist="t")
    res = am.fit(disp="off")
    garch_results[col] = res

    pars = res.params
    alpha = pars.get("alpha[1]", np.nan)
    beta  = pars.get("beta[1]", np.nan)
    omega = pars.get("omega", np.nan)
    dof   = pars.get("nu", np.nan)
    persistence = alpha + beta if (not math.isnan(alpha) and not math.isnan(beta)) else np.nan

    garch_params[col] = {
        "omega": float(omega),
        "alpha": float(alpha),
        "beta":  float(beta),
        "nu (Student-t dof)": float(dof),
        "alpha+beta": float(persistence)
    }

print("GARCH(1,1)-t parameter summary:")
print(pd.DataFrame(garch_params).T)


GARCH(1,1)-t parameter summary:
Empty DataFrame
Columns: []
Index: []


In [None]:
def one_step_forecast(arima_mdl, garch_res):

    mean_f = arima_mdl.get_forecast(steps=1).predicted_mean.iloc[0]


    vf = garch_res.forecast(horizon=1, reindex=False)
    var_1 = vf.variance.values[-1, 0]
    vol_1 = float(np.sqrt(var_1))

    return float(mean_f), vol_1

print("One-step-ahead forecasts:")
rows = []
for col in logret.columns:
    m, v = one_step_forecast(arima_models[col], garch_results[col])
    rows.append({"asset": col, "mean_forecast": m, "vol_forecast_daily": v})
print(pd.DataFrame(rows).set_index("asset"))


In [None]:
def rolling_arima_garch_forecasts(returns: pd.Series,
                                  train_window=TRAIN_WINDOW,
                                  steps=ROLLING_STEPS,
                                  max_pq=MAX_PQ):
    """
    Rolling ARIMA->GARCH 1-step forecasts for the last `steps` points.
    Returns DataFrame with columns ['mean_fc', 'vol_fc'] aligned to forecasted dates.
    """
    series = returns.dropna().copy()
    if len(series) < train_window + steps + 5:
        train_window = max(100, len(series) - steps - 5)

    out_idx = series.index[-steps:]
    mean_fc = pd.Series(index=out_idx, dtype=float)
    vol_fc  = pd.Series(index=out_idx, dtype=float)


    for i, dt in enumerate(out_idx):
        end_loc = series.index.get_loc(dt)
        start_loc = max(0, end_loc - train_window)
        train = series.iloc[start_loc:end_loc]


        step = auto_arima(train, start_p=0, start_q=0, max_p=max_pq, max_q=max_pq,
                          d=0, seasonal=False, stepwise=True, trace=False,
                          error_action="ignore", suppress_warnings=True)
        order = step.order
        arima_mdl = ARIMA(train, order=order).fit()
        resid = pd.Series(arima_mdl.resid, index=train.index)

        am = arch_model(resid, mean="Zero", vol="GARCH", p=1, q=1, dist="t")
        garch_res = am.fit(disp="off")

        m, v = one_step_forecast(arima_mdl, garch_res)
        mean_fc.iloc[i] = m
        vol_fc.iloc[i]  = v

        if (i+1) % 10 == 0:
            print(f"[{returns.name}] Rolling step {i+1}/{steps} done...")

    return pd.DataFrame({"mean_fc": mean_fc, "vol_fc": vol_fc})

def backtest_directional(returns: pd.Series, forecasts: pd.DataFrame,
                         vol_cap=0.04, tx_cost=0.0002):
    """
    positions = sign(mean_fc); optionally shrink when vol_fc > cap.
    """
    fc = forecasts.copy()
    pos = np.sign(fc["mean_fc"]).replace(0, 0.0)


    scale = (vol_cap / fc["vol_fc"]).clip(upper=1.0)
    pos = pos * scale

    ret = returns.loc[fc.index]
    strat_ret = pos.shift(1).fillna(0) * ret

    strat_ret = strat_ret - tx_cost * pos.diff().abs().fillna(0)

    eq = (1 + strat_ret).cumprod()
    return strat_ret, eq, pos

def perf_metrics(simple_returns: pd.Series, periods_per_year=252):
    mu = simple_returns.mean() * periods_per_year
    sigma = simple_returns.std() * np.sqrt(periods_per_year)
    sharpe = (mu / sigma) if sigma > 0 else np.nan

    cum = (1 + simple_returns).cumprod()
    roll_max = cum.cummax()
    dd = (roll_max - cum) / roll_max
    max_dd = dd.max()

    def cagr():
        if len(cum) < 2:
            return np.nan
        total = cum.iloc[-1]
        years = len(cum) / periods_per_year
        return total**(1/years) - 1 if total > 0 else np.nan

    return {"AnnualReturn": mu, "AnnualVol": sigma, "Sharpe": sharpe, "MaxDD": float(max_dd), "CAGR": cagr()}

results_table = []
for col in logret.columns:
    fc = rolling_arima_garch_forecasts(logret[col], train_window=TRAIN_WINDOW, steps=ROLLING_STEPS)
    strat_ret, eq, pos = backtest_directional(logret[col], fc, vol_cap=0.04, tx_cost=0.0002)
    met = perf_metrics(strat_ret)
    met["asset"] = col
    results_table.append(met)

    print(f"\n[{col}] First few forecasts:")
    print(fc.head())
    print(f"\n[{col}] First few strategy returns:")
    print(strat_ret.head())
    print(f"\n[{col}] Equity curve last 5:")
    print(eq.tail())

print("\n==== OOS Performance Summary (last", ROLLING_STEPS, "days) ====")
print(pd.DataFrame(results_table).set_index("asset"))


In [None]:
def backtest_vol_target(returns: pd.Series, forecasts: pd.DataFrame,
                        target_vol=0.02, max_leverage=2.0, tx_cost=0.0002):
    """
    position = clip( mean_fc / vol_fc * target_vol_scale )
    """
    fc = forecasts.copy()
    raw = (fc["mean_fc"] / fc["vol_fc"].replace(0, np.nan)).fillna(0)
    pos = raw * (target_vol / (fc["vol_fc"].median() + 1e-8))
    pos = pos.clip(-max_leverage, max_leverage)

    ret = returns.loc[fc.index]
    strat_ret = pos.shift(1).fillna(0) * ret
    strat_ret = strat_ret - tx_cost * pos.diff().abs().fillna(0)
    eq = (1 + strat_ret).cumprod()
    return strat_ret, eq, pos

asset = "BTC" if "BTC" in logret.columns else logret.columns[0]
fc = rolling_arima_garch_forecasts(logret[asset], TRAIN_WINDOW, ROLLING_STEPS)
strat_ret2, eq2, pos2 = backtest_vol_target(logret[asset], fc, target_vol=0.02)

print(f"[{asset}] Vol-target strategy metrics:")
print(perf_metrics(strat_ret2))
print("\nPosition sample:", pos2.head().to_string())


In [None]:
OUT_DIR = "/content"
for col in logret.columns:
    fc = rolling_arima_garch_forecasts(logret[col], TRAIN_WINDOW, ROLLING_STEPS)
    strat_ret, eq, pos = backtest_directional(logret[col], fc)

    fc.to_csv(f"{OUT_DIR}/{col}_forecasts.csv")
    pd.DataFrame({"strat_ret": strat_ret, "equity": eq, "position": pos}).to_csv(f"{OUT_DIR}/{col}_bt_directional.csv")

print("Saved CSVs to:", OUT_DIR)
