# 02 ARIMA Modeling
- ARIMA/SARIMA model for TSLA
- Parameter tuning (auto_arima)
- Model evaluation (MAE, RMSE, MAPE)

# 02 ARIMA Modeling
This notebook demonstrates how to build, tune, and evaluate an ARIMA/SARIMA model for TSLA stock price forecasting.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Load processed TSLA data
# Ensure the data is preprocessed and available in the correct path
file_path = '../data/processed/TSLA_processed.csv'
df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date').sort_index()
df.head()

In [None]:
# Visualize the Adjusted Close price
plt.figure(figsize=(12,6))
plt.plot(df.index, df['Adj Close'], label='TSLA Adj Close')
plt.title('TSLA Adjusted Close Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# Split data into train and test sets (train: 2015-2023, test: 2024-2025)
split_date = '2024-01-01'
train = df.loc[df.index < split_date, 'Adj Close']
test = df.loc[df.index >= split_date, 'Adj Close']
print(f'Train shape: {train.shape}, Test shape: {test.shape}')

In [None]:
# Use auto_arima to find the best (p,d,q) parameters
print('Running auto_arima for parameter selection...')
stepwise_model = auto_arima(train, start_p=1, start_q=1, max_p=3, max_q=3, m=1,
                            start_P=0, seasonal=False, d=None, trace=True,
                            error_action='ignore', suppress_warnings=True, stepwise=True)
print(f'Best ARIMA order: {stepwise_model.order}')

In [None]:
# Fit SARIMAX model with the best parameters
order = stepwise_model.order
model = SARIMAX(train, order=order, enforce_stationarity=False, enforce_invertibility=False)
model_fit = model.fit(disp=False)
print(model_fit.summary())

In [None]:
# Forecast over the test set period
n_test = len(test)
forecast = model_fit.forecast(steps=n_test)
forecast = pd.Series(forecast, index=test.index)

# Plot actual vs forecast
plt.figure(figsize=(12,6))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test', color='orange')
plt.plot(forecast.index, forecast, label='Forecast', color='green')
plt.title('ARIMA Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Adj Close')
plt.legend()
plt.show()

In [None]:
# Evaluate forecast performance
mae = mean_absolute_error(test, forecast)
rmse = np.sqrt(mean_squared_error(test, forecast))
mape = np.mean(np.abs((test - forecast) / test)) * 100
print(f'MAE: {mae:.4f}')
print(f'RMSE: {rmse:.4f}')
print(f'MAPE: {mape:.2f}%')

In [None]:
# Plot residuals to check for patterns
residuals = test - forecast
plt.figure(figsize=(12,4))
plt.plot(residuals)
plt.title('Forecast Residuals (Test - Forecast)')
plt.xlabel('Date')
plt.ylabel('Residual')
plt.show()

plt.figure(figsize=(6,4))
plt.hist(residuals, bins=30, edgecolor='k')
plt.title('Distribution of Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()

## Discussion
- Summarize the model's performance and discuss any patterns or issues observed in the residuals.
- Consider next steps: further tuning, adding exogenous variables, or comparing with LSTM.