# Task 2 - Develop Time Series Forecasting Models

In [1]:
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
from pmdarima import auto_arima
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
warnings.filterwarnings("ignore")

2025-08-13 22:24:37.050876: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-13 22:24:37.181807: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-13 22:24:37.372972: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755113077.729259   32892 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755113077.798858   32892 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755113078.359712   32892 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

### 1. Data Splitting
Split chronologically to preserve time order.

In [None]:
# Load TSLA Adj Close (from Task 1 CSV or re-fetch if needed)
# Assuming from Task 1 variable, or load here
tsla_close = closing_prices['TSLA']

# Split: Train up to 2023-12-31, Test 2024 onwards
train_end = '2023-12-31'
train = tsla_close[:train_end]
test = tsla_close[train_end:]

print(f"Train shape: {train.shape}, Test shape: {test.shape}")

### 2. Implement Classical Model: ARIMA/SARIMA
Use pmdarima.auto_arima for automatic parameter selection (p, d, q). Since prices are non-stationary, differencing (d=1) is expected.

In [None]:
# Auto-ARIMA to find best params (include seasonal if needed)
model_auto = auto_arima(train, seasonal=True, m=5,  # m=5 for weekly seasonality in trading days
                        start_p=0, start_q=0, max_p=5, max_q=5,
                        d=1, trace=True, error_action='ignore')

# Fit SARIMA with best params
best_order = model_auto.order
best_seasonal_order = model_auto.seasonal_order
sarima_model = SARIMAX(train, order=best_order,
                       seasonal_order=best_seasonal_order)
sarima_fit = sarima_model.fit(disp=False)

# Forecast on test set
sarima_forecast = sarima_fit.get_forecast(steps=len(test))
sarima_pred = sarima_forecast.predicted_mean
sarima_conf_int = sarima_forecast.conf_int()

# Evaluate
mae_sarima = mean_absolute_error(test, sarima_pred)
rmse_sarima = np.sqrt(mean_squared_error(test, sarima_pred))
mape_sarima = np.mean(np.abs((test - sarima_pred) / test)) * 100

print(f"SARIMA - MAE: {mae_sarima}, RMSE: {rmse_sarima}, MAPE: {mape_sarima}%")

### 3. Implement Deep Learning Model: LSTM
LSTM handles non-stationarity better but requires scaling and sequence preparation. Use Keras/TensorFlow.

In [None]:
# Scale train data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))

# Create sequences (lookback=60 days)


def create_sequences(data, lookback=60):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)


lookback = 60
X_train, y_train = create_sequences(train_scaled, lookback)
X_train = X_train.reshape(
    (X_train.shape[0], X_train.shape[1], 1))  # For LSTM input

# Build LSTM
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(lookback, 1)))
model.add(Dropout(0.2))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train with early stopping
early_stop = EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, batch_size=32,
                    validation_split=0.2, callbacks=[early_stop])

# Prepare test inputs (use last lookback from train + test)
test_inputs = np.concatenate(
    (train_scaled[-lookback:], scaler.transform(test.values.reshape(-1, 1))))
X_test, _ = create_sequences(test_inputs, lookback)
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Predict and inverse scale
lstm_pred_scaled = model.predict(X_test)
lstm_pred = scaler.inverse_transform(lstm_pred_scaled)

# Evaluate (align lengths if needed)
mae_lstm = mean_absolute_error(test, lstm_pred)
rmse_lstm = np.sqrt(mean_squared_error(test, lstm_pred))
mape_lstm = np.mean(np.abs((test - lstm_pred) / test)) * 100

print(f"LSTM - MAE: {mae_lstm}, RMSE: {rmse_lstm}, MAPE: {mape_lstm}%")

## 4. Compare Models and Visualize

Plot predictions vs actuals for both models.

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(test.index, test, label='Actual')
plt.plot(test.index, sarima_pred, label='SARIMA Pred')
plt.plot(test.index, lstm_pred, label='LSTM Pred')
plt.title('TSLA Price Forecasts vs Actual')
plt.legend()
plt.show()