In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load preprocessed data
df = pd.read_csv("../results/preprocessed_data.csv", parse_dates=True, index_col=0)

# Use the TSLA column for time-series forecasting
ts = df['TSLA']

# Train-test split (80% train, 20% test)
train_size = int(len(ts) * 0.8)
train, test = ts[:train_size], ts[train_size:]

# Fit ARIMA Model
p, d, q = 5, 1, 2  # Adjust based on ACF/PACF analysis
model = ARIMA(train, order=(p, d, q))
arima_model = model.fit()

# Forecasting
forecast = arima_model.forecast(steps=len(test))
forecast.index = test.index

# Model evaluation
mae = mean_absolute_error(test, forecast)
mse = mean_squared_error(test, forecast)

print(f"ARIMA Model - MAE: {mae}, MSE: {mse}")

# Plot Actual vs Predicted
plt.figure(figsize=(12, 6))
plt.plot(train.index, train, label="Training Data", color="blue")
plt.plot(test.index, test, label="Actual Prices", color="green")
plt.plot(forecast.index, forecast, label="ARIMA Forecast", color="red", linestyle="dashed")
plt.legend()
plt.title("ARIMA Model - Actual vs Forecasted TSLA Prices")
plt.xlabel("Date")
plt.ylabel("Normalized Price")
plt.grid()
plt.show()
