# 1 . Imprort Required Libraries and Load Data

### Train-Test Split for ARIMA Model

## ➡️ ARIMA Model

## ➡️ LSTM Model

## ➡️ Model Comparision

In [1]:
# %% [markdown]
# # Time Series Forecasting
# ## Task 2: Hybrid ARIMA-LSTM Modeling

# %%
# 1. Setup
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.callbacks import EarlyStopping  # Added missing import

# Set project root
PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT / 'src'))

# Import custom modules
from data_processing import load_merged_data, get_feature
from models import (train_arima, forecast_arima,
                   prepare_lstm_data, build_lstm_model,
                   create_hybrid_model, evaluate_forecasts)

# Visualization settings
plt.style.use('ggplot')
sns.set_theme(style="whitegrid")
%matplotlib inline

# %%
# 2. Data Loading
print("Loading processed data...")
try:
    df = load_merged_data()
    tsla = df['TSLA'].astype('float32')
    
    # Load engineered features
    returns = get_feature('daily_returns')['TSLA']
    volatility = get_feature('rolling_volatility')['TSLA']
    
    print(f"Data loaded successfully ({len(tsla)} points)")
    print(f"Date range: {tsla.index.min()} to {tsla.index.max()}")
    
except Exception as e:
    print("❌ Data loading failed. Verify Task 1 completed successfully.")
    raise e

# %%
# 3. Train-Test Split
train_size = int(len(tsla) * 0.8)
train, test = tsla.iloc[:train_size], tsla.iloc[train_size:]

# %%
# 4. ARIMA Modeling - Ignore date warnings
import warnings
from statsmodels.tools.sm_exceptions import ValueWarning
warnings.filterwarnings("ignore", category=ValueWarning)

print("\n🔧 Training ARIMA model...")
arima_model = train_arima(train)
arima_forecast, _ = forecast_arima(arima_model, test)

# Restore warnings
warnings.filterwarnings("default", category=ValueWarning)

# %%
# 5. LSTM Modeling
print("\n🧠 Training LSTM model...")
X, y, scaler = prepare_lstm_data(tsla, additional_features=[returns, volatility])
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Build model
lstm_model = build_lstm_model(
    input_shape=(X_train.shape[1], X_train.shape[2])
)

# Train with early stopping
history = lstm_model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)],  # Now properly defined
    verbose=1
)

# Generate forecasts
lstm_pred = lstm_model.predict(X_test)
lstm_forecast = scaler.inverse_transform(lstm_pred.reshape(-1,1)).flatten()
lstm_index = tsla.index[60 + split : 60 + split + len(lstm_pred)]
lstm_forecast = pd.Series(lstm_forecast, index=lstm_index)

# %%
# 6. Hybrid Model
print("\n🤖 Creating hybrid ensemble...")
hybrid_predict = create_hybrid_model(arima_model, lstm_model)
hybrid_forecast = hybrid_predict(X_test)
hybrid_forecast = pd.Series(hybrid_forecast, index=lstm_index)

# %%
# 7. Evaluation
results = evaluate_forecasts(
    test,
    {
        'ARIMA': arima_forecast,
        'LSTM': lstm_forecast,
        'Hybrid': hybrid_forecast
    }
)

print("\n📊 Model Performance:")
display(results.style.background_gradient(cmap='Blues'))

# %%
# 8. Visualization
plt.figure(figsize=(14,7))
plt.plot(train[-100:], label='Training Data', alpha=0.6)
plt.plot(test, label='Actual', color='black', linewidth=2)
plt.plot(arima_forecast, label='ARIMA', linestyle='--')
plt.plot(lstm_forecast, label='LSTM', linestyle=':')
plt.plot(hybrid_forecast, label='Hybrid', linewidth=2)
plt.title('TSLA Price Forecasting Comparison')
plt.legend()
plt.grid(True)
plt.show()

# %%
# 9. Save Results
output_dir = PROJECT_ROOT / 'outputs'
output_dir.mkdir(exist_ok=True, parents=True)

forecast_df = pd.DataFrame({
    'Actual': test[lstm_index],
    'ARIMA': arima_forecast[lstm_index],
    'LSTM': lstm_forecast,
    'Hybrid': hybrid_forecast
})
forecast_df.to_csv(output_dir / 'forecasts.csv')

print(f"\n✅ Results saved to {output_dir / 'forecasts.csv'}")

Loading processed data...
Data loaded successfully (2541 points)
Date range: 2015-07-01 00:00:00 to 2025-08-07 00:00:00

🔧 Training ARIMA model...

🧠 Training LSTM model...


  super().__init__(**kwargs)


Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 105ms/step - loss: 0.1801 - val_loss: 5.5154e-04
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 77ms/step - loss: 0.1487 - val_loss: 0.0048
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 99ms/step - loss: 0.1233 - val_loss: 0.0124
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 80ms/step - loss: 0.1034 - val_loss: 0.0223
Epoch 5/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 78ms/step - loss: 0.0883 - val_loss: 0.0339
Epoch 6/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 78ms/step - loss: 0.0769 - val_loss: 0.0459
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step

🤖 Creating hybrid ensemble...
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


ValueError: operands could not be broadcast together with shapes (60,) (497,) 