In [None]:
# 02_model_development.ipynb

"""
This Jupyter Notebook demonstrates time series forecasting for TSLA
using an ARIMA model (auto_arima from pmdarima).
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math




In [None]:
# 1. Load Data
df = pd.read_csv('../data/raw/TSLA.csv', parse_dates=['Date'], index_col='Date').sort_index()
ts = df['Close'].dropna()

# 2. Train-Test Split
train_end = '2023-12-31'
train_data = ts.loc[:train_end]
test_data = ts.loc['2024-01-01':]



In [None]:
# 3. Fit ARIMA using auto_arima
model = auto_arima(train_data, seasonal=False, trace=False, stepwise=True)



In [None]:
# 4. Forecast on Test Set
n_test = len(test_data)
forecast_test = model.predict(n_periods=n_test)
forecast_index = test_data.index
forecast_test_series = pd.Series(forecast_test, index=forecast_index)



In [None]:
# 5. Evaluate
mse = mean_squared_error(test_data, forecast_test_series)
rmse = math.sqrt(mse)
mae = mean_absolute_error(test_data, forecast_test_series)
mape = np.mean(np.abs((test_data - forecast_test_series) / test_data)) * 100

print(f"Test RMSE: {rmse:.2f}")
print(f"Test MAE:  {mae:.2f}")
print(f"Test MAPE: {mape:.2f}%")



In [None]:
# 6. Visualize
plt.figure(figsize=(12,6))
plt.plot(train_data.index, train_data, label='Train Data')
plt.plot(test_data.index, test_data, label='Test Data')
plt.plot(forecast_test_series.index, forecast_test_series, label='Predicted', color='red')
plt.title('TSLA ARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()



In [None]:
# 7. Forecast Future
future_periods = 30
future_preds = model.predict(n_periods=future_periods)
future_dates = pd.date_range(start=test_data.index[-1] + pd.Timedelta(days=1),
                             periods=future_periods, freq='B')
future_series = pd.Series(future_preds, index=future_dates)

print("Future Forecast:")
print(future_series.head(10))