# 03 — Task 2: Modeling (ARIMA vs LSTM) for TSLA

In [None]:
# === Colab Setup ===
!pip install -U pip --quiet
!pip install "numpy<2.0" "scipy<1.11" --quiet
!pip install "pmdarima==2.0.3" --quiet
!pip install pandas numpy matplotlib seaborn scikit-learn statsmodels yfinance PyPortfolioOpt arch tensorflow --quiet
import sys, os, platform
print("Python:", sys.version)
print("Platform:", platform.platform())
print("CWD:", os.getcwd())

In [None]:
# === Configuration ===
START_DATE = "2015-07-01"
END_DATE   = "2025-07-31"
TICKERS    = ["TSLA", "BND", "SPY"]
BT_START   = "2024-08-01"
BT_END     = "2025-07-31"
TRADING_DAYS = 252
RISK_FREE_DAILY = 0.0

DATA_DIR = "data"
OUT_PLOTS = "outputs/plots"
OUT_MODELS = "outputs/models"
OUT_METRICS = "outputs/metrics"

for d in [DATA_DIR, OUT_PLOTS, OUT_MODELS, OUT_METRICS]:
    os.makedirs(d, exist_ok=True)


In [None]:
# === Load data ===
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

prices = pd.read_csv(f"{DATA_DIR}/prices_adj_close.csv", index_col=0, parse_dates=True)
tsla = prices['TSLA'].dropna()
train = tsla.loc[:'2023-12-31']
test  = tsla.loc['2024-01-01':'2025-07-31']
print(train.index.min(), "→", train.index.max(), "| test:", test.index.min(), "→", test.index.max())



In [None]:
# === ARIMA (pmdarima.auto_arima) ===
import pmdarima as pm
arima_model = pm.auto_arima(train, seasonal=False, stepwise=True, error_action="ignore", suppress_warnings=True,
                            max_p=5, max_q=5, max_d=2, trace=False)
print(arima_model.summary())

n_test = len(test)
arima_fc, arima_conf = arima_model.predict(n_periods=n_test, return_conf_int=True)
arima_fc = pd.Series(arima_fc, index=test.index, name="ARIMA_FC")

mae = mean_absolute_error(test, arima_fc)
rmse = mean_squared_error(test, arima_fc, squared=False)
mape = (np.abs((test - arima_fc)/test)).mean()*100
print(f"ARIMA → MAE: {mae:.2f} | RMSE: {rmse:.2f} | MAPE: {mape:.2f}%")

plt.figure(figsize=(12,5))
plt.plot(train.index, train, label="Train")
plt.plot(test.index, test, label="Test", alpha=0.8)
plt.plot(arima_fc.index, arima_fc, label="ARIMA Forecast")
plt.fill_between(test.index, arima_conf[:,0], arima_conf[:,1], alpha=0.15, label="95% CI")
plt.title("TSLA — ARIMA forecast vs actual (Test)"); plt.legend(); plt.savefig(f"{OUT_PLOTS}/arima_test.png"); plt.show()

# Save ARIMA model
from joblib import dump
dump(arima_model, f"{OUT_MODELS}/tsla_arima.pkl")


In [None]:
# === LSTM baseline ===
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers

scaler = MinMaxScaler()
ts_scaled = scaler.fit_transform(tsla.values.reshape(-1,1))

split_idx = len(train)
lookback = 60

def make_seq(arr, lookback=60):
    X,y=[],[]
    for i in range(lookback, len(arr)):
        X.append(arr[i-lookback:i, 0])
        y.append(arr[i,0])
    X = np.array(X); y = np.array(y)
    return X[...,None], y

X_train, y_train = make_seq(ts_scaled[:split_idx], lookback)
X_test, y_test   = make_seq(ts_scaled[split_idx-lookback:], lookback)

model = keras.Sequential([
    layers.Input(shape=(lookback,1)),
    layers.LSTM(64, return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(32),
    layers.Dropout(0.2),
    layers.Dense(1)
])
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
hist = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)

yhat_scaled = model.predict(X_test).ravel()
yhat = scaler.inverse_transform(yhat_scaled.reshape(-1,1)).ravel()

# Align prediction index with test (offset due to lookback)
pred_index = tsla.index[split_idx:][lookback:]
lstm_fc = pd.Series(yhat, index=pred_index, name="LSTM_FC")
test_aligned = tsla[pred_index.min():pred_index.max()]

mae = mean_absolute_error(test_aligned, lstm_fc)
rmse = mean_squared_error(test_aligned, lstm_fc, squared=False)
mape = (np.abs((test_aligned - lstm_fc)/test_aligned)).mean()*100
print(f"LSTM → MAE: {mae:.2f} | RMSE: {rmse:.2f} | MAPE: {mape:.2f}%")

plt.figure(figsize=(12,5))
plt.plot(train.index, train, label="Train")
plt.plot(test.index, test, label="Test", alpha=0.8)
plt.plot(lstm_fc.index, lstm_fc, label="LSTM Forecast")
plt.title("TSLA — LSTM forecast vs actual (Test)"); plt.legend(); plt.savefig(f"{OUT_PLOTS}/lstm_test.png"); plt.show()

# Save LSTM model (optional)
model.save(f"{OUT_MODELS}/tsla_lstm.h5")


> Both models evaluated on 2024–2025 test set. Proceed to Notebook 04 to generate a 12-month forecast using ARIMA (or load LSTM if you prefer).