In [4]:
# Core libraries
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings('ignore')

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning - Preprocessing
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Machine Learning - Models
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import xgboost as xgb

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Model interpretation
import shap

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Display settings
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-darkgrid')

print("‚úÖ All libraries imported successfully!")
print(f"üì¶ TensorFlow version: {tf.__version__}")
print(f"üì¶ XGBoost version: {xgb.__version__}")
print(f"üéØ Ready to build models!")

‚úÖ All libraries imported successfully!
üì¶ TensorFlow version: 2.20.0
üì¶ XGBoost version: 3.1.2
üéØ Ready to build models!


In [5]:
# Load the processed data with technical indicators
print("="*60)
print("LOADING PROCESSED DATA")
print("="*60)

# Define file paths
data_path = '../data/processed/'

# Load data for all three indices
INDICES = ['SPY', 'IWM', 'QQQ']
data_dict = {}

for ticker in INDICES:
    filename = f'{data_path}{ticker}_with_indicators.csv'
    
    if os.path.exists(filename):
        df = pd.read_csv(filename, index_col=0, parse_dates=True)
        data_dict[ticker] = df
        print(f"‚úÖ Loaded {ticker}: {df.shape[0]} rows, {df.shape[1]} columns")
    else:
        print(f"‚ùå File not found: {filename}")
        print(f"   Make sure you completed Phase 3 (Feature Engineering)")

if len(data_dict) == 0:
    print("\n‚ö†Ô∏è ERROR: No data files found!")
    print("   Please complete Phase 3 first to generate processed data.")
else:
    print(f"\n‚úÖ Successfully loaded {len(data_dict)} datasets")
    print(f"üìä Total features available: {data_dict['SPY'].shape[1]}")
```

**Run:** Shift + Enter

---

### üéì **EXPLANATION:**

- **Loads data from Phase 3** (data with technical indicators)
- **Checks if files exist** before trying to load
- **Stores in dictionary** for easy access

**Expected Output:**
```
============================================================
LOADING PROCESSED DATA
============================================================
‚úÖ Loaded SPY: 2517 rows, 35 columns
‚úÖ Loaded IWM: 2517 rows, 35 columns
‚úÖ Loaded QQQ: 2517 rows, 35 columns

‚úÖ Successfully loaded 3 datasets
üìä Total features available: 35

SyntaxError: invalid character '‚úÖ' (U+2705) (3206852515.py, line 47)