In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.feature_selection import mutual_info_classif
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Bidirectional, LSTM, Dense, Dropout, BatchNormalization, Multiply, Flatten, Activation, RepeatVector, Permute
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# --- Data Loading and Preprocessing ---
print("Loading data...")
df = pd.read_csv('sp500_master_data.csv', parse_dates=['Date'], index_col='Date')
print("Data loaded successfully.")

print("Preprocessing data...")
# Calculate Returns
df['Returns'] = df['Close'].pct_change()

# Handle missing values with time-based interpolation
df = df.interpolate(method='time')
df = df.dropna()

# Create target variable: 0 (Down), 1 (Neutral), 2 (Up)
price_change_threshold = 0.005
df['target'] = np.where(df['Returns'].shift(-1) > price_change_threshold, 2,
                        np.where(df['Returns'].shift(-1) < -price_change_threshold, 0, 1))
df = df.dropna(subset=['target'])
df['target'] = df['target'].astype(int)

# --- Feature Selection using Mutual Information ---
# Select numerical features for MI calculation
features = df.select_dtypes(include=[np.number]).columns.drop('target')
mi_scores = mutual_info_classif(df[features], df['target'], random_state=42)
mi_df = pd.DataFrame({'Feature': features, 'MI Score': mi_scores}).sort_values('MI Score', ascending=False)
selected_features = mi_df.head(15)['Feature'].tolist()  # Top 15 features
print(f"Selected features: {selected_features}")

# --- Normalize Features ---
scaler = MinMaxScaler()
df[selected_features] = scaler.fit_transform(df[selected_features])

# --- Prepare Sequence Data ---
sequence_length = 30
X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(df[selected_features].iloc[i:i+sequence_length].values)
    y.append(df['target'].iloc[i+sequence_length])
X = np.array(X)
y = tf.keras.utils.to_categorical(np.array(y), num_classes=3)

# --- Time Series Split ---
tscv = TimeSeriesSplit(n_splits=5)
for train_idx, test_idx in tscv.split(X):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    break  # Use the last split for training and testing

# --- Build CNN-BiLSTM Model with Attention ---
def attention_layer(inputs):
    weights = Dense(1, activation='tanh')(inputs)
    weights = Flatten()(weights)
    weights = Activation('softmax')(weights)
    weights = RepeatVector(inputs.shape[2])(weights)
    weights = Permute([2, 1])(weights)
    output = Multiply()([inputs, weights])
    return tf.keras.layers.Lambda(lambda x: tf.keras.backend.sum(x, axis=1))(output)

def build_model(input_shape, num_classes=3):
    inputs = Input(shape=input_shape)
    x = Conv1D(64, 3, padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(128, 3, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)
    x = Dropout(0.2)(x)
    x = Bidirectional(LSTM(32, return_sequences=True))(x)
    x = Dropout(0.2)(x)
    x = attention_layer(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.2)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


Loading data...
Data loaded successfully.
Preprocessing data...


  df = df.interpolate(method='time')


Selected features: ['Market_Return', 'VIX_MA_10', 'Market_Volatility', 'VIX', 'Price_Range_Pct', 'Volatility_60d', 'Lower_Channel_50', 'Volatility_20d', 'Lower_Channel_20', 'Channel_Width_50', 'Upper_Channel_20', 'Volatility_5d', 'Channel_Width_20', 'Upper_Channel_50', 'BB_Width_20']


In [None]:

# --- Train Model ---
model = build_model((sequence_length, len(selected_features)))
y_train_labels = np.argmax(y_train, axis=1)
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_labels), y=y_train_labels)
class_weight_dict = dict(enumerate(class_weights))

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    class_weight=class_weight_dict,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
               ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)]
)

# --- Evaluate Model ---
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=['Down', 'Neutral', 'Up']))

# Plot confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Down', 'Neutral', 'Up'], yticklabels=['Down', 'Neutral', 'Up'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()

print("Workflow completed.")