## Project: Bayesian Structural Time Series Model for Stock Prediction

#### Team Members:
- Asteria Xu (56376452)
- Yufei Shen (29116514)

In [None]:
library(dplyr)
library(readr)
library(lubridate)
library(bsts)
library(forecast)
library(ggplot2)
library(PerformanceAnalytics)

In [None]:
# Load and preprocess data
tsla <- read_csv("TSLA.csv") %>%
  mutate(Date = as_date(Date),
         Close = as.numeric(Close)) %>%
  filter(Date >= max(Date) - months(18)) %>%
  arrange(Date)

y <- tsla$Close
dates <- tsla$Date

In [None]:
# Create train and test data
train_size <- floor(0.8 * length(y))
train <- window(ts(y), end = train_size)
test <- window(ts(y), start = train_size + 1)

In [None]:
# ARIMA Model
arima_model <- auto.arima(train, stepwise = FALSE, approximation = FALSE)
arima_pred <- forecast(arima_model, h = length(test))

In [None]:
# BSTS Model
ss <- AddLocalLinearTrend(list(), train)
ss <- AddSeasonal(ss, train, nseasons = 252)
bsts_model <- bsts(train, state.specification = ss, niter = 1000, ping = 0)
bsts_pred <- predict(bsts_model, horizon = length(test), burn = 100)

In [None]:
# Evaluation Metrics
calculate_metrics <- function(pred, actual, model_type = "arima") {
  if (model_type == "arima") {
    pred_mean <- as.numeric(pred$mean)
    lower <- as.numeric(pred$lower[, "95%"])
    upper <- as.numeric(pred$upper[, "95%"])
  } else { 
    pred_mean <- as.numeric(pred$mean)
    lower <- as.numeric(pred$interval[1,])
    upper <- as.numeric(pred$interval[2,])
  }
  
  list(
    MAE = mean(abs(pred_mean - actual)),
    RMSE = sqrt(mean((pred_mean - actual)^2)),
    Coverage = mean(actual >= lower & actual <= upper)
  )
}

arima_metrics <- calculate_metrics(arima_pred, test, "arima")
bsts_metrics <- calculate_metrics(bsts_pred, test, "bsts")

In [None]:
# Results Comparison
results <- data.frame(
  Model = c("BSTS", "ARIMA"),
  MAE = c(bsts_metrics$MAE, arima_metrics$MAE),
  RMSE = c(bsts_metrics$RMSE, arima_metrics$RMSE),
  Coverage = c(bsts_metrics$Coverage, arima_metrics$Coverage)
)
print(results)

In [None]:
# Create a time series object for the full data
full_ts <- ts(y, start = start(train), frequency = frequency(train))

In [None]:
# Convert predictions to time series objects with proper time indices
arima_pred_ts <- ts(arima_pred$mean,
                    start = end(train) + c(0, 1),
                    frequency = frequency(train))

bsts_pred_ts <- ts(bsts_pred$mean, 
                   start = end(train) + c(0, 1), 
                   frequency = frequency(train))

In [None]:
# Create the plot
autoplot(full_ts) +
  autolayer(arima_pred_ts, series = "ARIMA") +
  autolayer(bsts_pred_ts, series = "BSTS") +
  xlab("Date") + ylab("Price") +
  ggtitle("TSLA Closing Price Forecast Comparison") +
  scale_color_manual(values = c("ARIMA" = "red", "BSTS" = "blue"))