In [15]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error
import math
import warnings
warnings.filterwarnings('ignore')

In [12]:
# Load Cleaned Data
data = pd.read_csv('../data/cleaned_data.csv')

# Set Date column as index
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)
tsla_close = data['Close_TSLA']

In [13]:
data.head()

Unnamed: 0_level_0,Close_BND,Close_SPY,Close_TSLA,High_BND,High_SPY,High_TSLA,Low_BND,Low_SPY,Low_TSLA,Open_BND,Open_SPY,Open_TSLA,Volume_BND,Volume_SPY,Volume_TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-01-02,62.573135,172.592834,14.620667,62.603419,173.811068,14.883333,62.399003,171.542642,14.217333,62.406576,173.390991,14.858,2218800,121465900,71466000
2015-01-05,62.754814,169.475922,14.006,62.777526,171.702325,14.433333,62.610966,169.165069,13.810667,62.64125,171.534297,14.303333,5820100,169632600,80527500
2015-01-06,62.936531,167.879639,14.085333,63.125802,170.316081,14.28,62.860824,167.073085,13.614,62.860824,169.78678,14.004,3887600,209151400,93928500
2015-01-07,62.974373,169.971603,14.063333,63.05008,170.31607,14.318667,62.875954,168.770189,13.985333,62.944089,169.223867,14.223333,2433400,125346700,44526000
2015-01-08,62.875938,172.987762,14.041333,62.921362,173.206211,14.253333,62.81537,171.383078,14.000667,62.921362,171.399871,14.187333,1873400,147217800,51637500


In [17]:
# Splitting Data into Training and Testing Sets
train_size = int(len(tsla_close) * 0.8)
train, test = tsla_close[:train_size], tsla_close[train_size:]

# ARIMA Model
model_arima = ARIMA(train, order=(5, 1, 0))
model_arima_fit = model_arima.fit()
forecast_arima = model_arima_fit.forecast(steps=len(test))

# SARIMA Model
model_sarima = SARIMAX(train, order=(5, 1, 0), seasonal_order=(1, 1, 1, 12))
model_sarima_fit = model_sarima.fit()
forecast_sarima = model_sarima_fit.forecast(steps=len(test))

# Evaluation Metrics
def evaluate_forecast(true, predicted):
    mae = mean_absolute_error(true, predicted)
    rmse = math.sqrt(mean_squared_error(true, predicted))
    mape = np.mean(np.abs((true - predicted) / true)) * 100
    return mae, rmse, mape

print("Evaluation Metrics: MAE, RMSE, MAPE")
print("ARIMA:", evaluate_forecast(test, forecast_arima))
print("SARIMA:", evaluate_forecast(test, forecast_sarima))

# Save Forecasts
forecast_df = pd.DataFrame({
    'Date': test.index,
    'Actual': test.values,
    'ARIMA_Forecast': forecast_arima.values,
    'SARIMA_Forecast': forecast_sarima.values,
})
forecast_df.to_csv('../data/model_forecasts.csv', index=False)
print("Model forecasts saved successfully as model_forecasts.csv")

Evaluation Metrics: MAE, RMSE, MAPE
ARIMA: (55.47153137397432, 81.64328055820296, nan)
SARIMA: (46.916356551890864, 65.60665025567543, nan)
Model forecasts saved successfully as model_forecasts.csv
