# Task 2: Initial Time Series Forecasting Model

## Objective
Develop, train, and evaluate time series forecasting models to predict Tesla's future stock prices.

## Models
- ARIMA/SARIMA
- LSTM (Long Short-Term Memory)

## Steps
1. Load Processed Data
2. Train/Test Split (Chronological)
3. ARIMA Implementation
4. LSTM Implementation
5. Evaluation & Comparison

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import warnings

warnings.filterwarnings('ignore')
plt.rcParams["figure.figsize"] = (12, 6)

## 1. Load Processed Data

In [None]:
df = pd.read_csv('../data/processed/TSLA_processed.csv', index_col=0, parse_dates=True)
# Ensure no NaNs and proper index
df.dropna(inplace=True)
print(df.head())
print(df.info())

## 2. Train/Test Split

In [None]:
# Chronological Split
train_data = df[df.index < '2025-01-01']
test_data = df[df.index >= '2025-01-01']

print(f"Train size: {len(train_data)}")
print(f"Test size: {len(test_data)}")

plt.plot(train_data.index, train_data['Close'], label='Train')
plt.plot(test_data.index, test_data['Close'], label='Test')
plt.legend()
plt.title('Train vs Test Split')
plt.show()

## 3. ARIMA Model

In [None]:
# Auto ARIMA to find best parameters
try:
    model_auto = auto_arima(train_data['Close'], seasonal=False, trace=True, error_action='ignore', suppress_warnings=True)
    print(model_auto.summary())
except Exception as e:
    print(f"ARIMA failed: {e}")

In [None]:
# Train ARIMA
# Use best order if available, else default
if 'model_auto' in locals():
    order = model_auto.order
else:
    order = (5, 1, 0) # Default fallback

model_arima = ARIMA(train_data['Close'], order=order)
fit_arima = model_arima.fit()

# Forecast
forecast_arima = fit_arima.forecast(steps=len(test_data))
forecast_arima = pd.Series(forecast_arima, index=test_data.index)

plt.plot(train_data.index, train_data['Close'], label='Train')
plt.plot(test_data.index, test_data['Close'], label='Test')
plt.plot(test_data.index, forecast_arima, label='ARIMA Forecast')
plt.legend()
plt.show()

## 4. LSTM Model

In [None]:
# Scaling
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(train_data[['Close']])
scaled_test = scaler.transform(test_data[['Close']])

def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 60
X_train, y_train = create_sequences(scaled_train, seq_length)
X_test, y_test = create_sequences(np.concatenate((scaled_train[-seq_length:], scaled_test)), seq_length)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

In [None]:
# Build LSTM
model_lstm = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
    LSTM(50, return_sequences=False),
    Dense(25),
    Dense(1)
])

model_lstm.compile(optimizer='adam', loss='mean_squared_error')
model_lstm.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_test, scaled_test))

In [None]:
# Predict
predictions = model_lstm.predict(X_test)
predictions = scaler.inverse_transform(predictions)

forecast_lstm = pd.Series(predictions.flatten(), index=test_data.index)

plt.plot(train_data.index, train_data['Close'], label='Train')
plt.plot(test_data.index, test_data['Close'], label='Test')
plt.plot(test_data.index, forecast_lstm, label='LSTM Forecast')
plt.legend()
plt.show()

## 5. Evaluation

In [None]:
def evaluate(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return {'Model': model_name, 'MAE': mae, 'RMSE': rmse, 'MAPE': mape}

res_arima = evaluate(test_data['Close'], forecast_arima, 'ARIMA')
res_lstm = evaluate(test_data['Close'], forecast_lstm, 'LSTM')

results = pd.DataFrame([res_arima, res_lstm])
print(results)
results.to_csv('model_comparison.csv', index=False)
print("Results saved to model_comparison.csv")