In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from scipy.stats import boxcox
import yfinance as yf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima

start="2020-01-01"
end="2025-01-01"
ticker = "TSLA"
p = 1
d = 0
q = 1
order = (p, d, q)

P = 1
D = 0
Q = 1
s = 5
seasonal_order = (P, D, Q, s)

exog_vars_list = ["Volume", "VIX"]

In [None]:
def get_data(ticker):
  features = ["Close", "Volume"] if ticker != "^VIX" else "Close"
  return yf.Ticker(ticker).history(start=start, end=end)[features]

In [None]:
data = get_data(ticker)
data["VIX"] = get_data("^VIX")

In [None]:
def apply_boxcox(df):
  df = df.copy()
  print(f"ADF before box-cox transform {adfuller(df["Close"])[1]:.3f}")
  box_cox_transform, lmbd = boxcox(df["Close"])
  y = pd.Series(box_cox_transform, index=df.index)
  df["Transformed_Close_diff"] = (pd.Series(box_cox_transform, index=df.index).diff())
  print(df["Transformed_Close_diff"])
  print(f"ADF after box-cox transform {adfuller(df["Transformed_Close_diff"].dropna())[1]:.3f}")
  print(f"Lambda: {lmbd}")
  return  df.dropna(), y

def plot(data):
  plt.figure(figsize=(10,4))
  for col in ["Close", "Transformed_Close_diff"]:
    plt.plot(data[col], label=f"{col} Closing Prices")
    plt.title("Closing Prices")
    plt.xlabel("date")
    plt.ylabel("price")
    plt.legend()
    plt.show()

def sarimax_statsmodels(y, exog_vars, order, seasonal_order, testsize=30):
  y_train = y[:-testsize]
  y_test = y[-testsize:]
  
  exog_train = exog_vars[:-testsize] if exog_vars is not None else None
  exog_test = exog_vars[-testsize:] if exog_vars is not None else None
  
  model = SARIMAX(
    y_train,
    order=order,
    seasonal_order=seasonal_order,
    exog=exog_train,
    enforce_stationarity=False,
    enforce_invertibility=False
  )
  results = model.fit(disp=False)
  
  results = model.fit(disp=False)
  forecasts = []
  for i in range(testsize):
    exog_slice = exog_test[i:i+1] if exog_test is not None else None
    pred = results.get_forecast(steps=1, exog=exog_slice).predicted_mean
    forecasts.append(pred.values[0])

  results = SARIMAX(
      y_test,
      order=order,
      seasonal_order=seasonal_order,
      exog=exog_test,
      enforce_stationarity=False,
      enforce_invertibility=False
    ).fit(disp=False)

  forecasts = np.array(forecasts)

  def visualize(y_true, y_pred):
    plt.figure(figsize=(10,5))
    plt.plot(range(len(y_true)), y_true, marker='o', label='Actual')
    plt.plot(range(len(y_pred)), y_pred, marker='x', label='Forecast')
    plt.title('SARIMAX Rolling 1-Step Forecast vs Actual')
    plt.xlabel('Time (last points)')
    plt.ylabel('Transformed Closing Price')
    plt.legend()
    plt.grid(True)
    plt.show()

  visualize(y_test, forecasts)
      
  return results

def auto_sarimax(raw_y, y, exog_vars, s, testsize=30):
  y_train = raw_y[:-testsize]
  y_test = raw_y[-testsize:]
  exog_train = exog_vars[:-testsize] if exog_vars is not None else None
  exog_test = exog_vars[-testsize:] if exog_vars is not None else None

  model = auto_arima(
    y=y_train,
    X=exog_train,
    m=s,
    seasonal=True,
    suppress_warnings=True,
    error_action='ignore'
  )
  forecasts = []
  
  for i in range(0, testsize, 1):
    X = exog_test[i:i+1] if exog_train is not None else None
    forecast = model.predict(n_periods=1, X=X)
    forecasts.append(forecast)
  forecsts = np.array(forecasts)

  def visualize(y_true, y_pred):
    plt.figure(figsize=(10,5))
    plt.plot(range(len(y_true)), y_true, marker='o', label='Actual')
    plt.plot(range(len(y_pred)), y_pred, marker='x', label='Forecast')
    plt.title('SARIMAX Forecast vs Actual')
    plt.xlabel('Time (last 10 points)')
    plt.ylabel('Transformed Closing Price')
    plt.legend()
    plt.grid(True)
    plt.show()

  visualize(y_test, forecasts)






In [None]:
data, raw_y = apply_boxcox(data)
y = data["Transformed_Close_diff"].dropna()
exog_vars = data[exog_vars_list]
print(len(exog_vars), len(raw_y[1:,]))
results = sarimax_statsmodels(y, exog_vars, order, seasonal_order)
#auto_sarimax_results =  auto_sarimax(raw_y[1:,], y, exog_vars, s)

#print(results.summary())