In [None]:
# Enhanced feature importance analysis
def analyze_feature_importance_for_symbol(model, X_test, y_test, feature_names, symbol):
    """Analyze feature importance using permutation importance"""
    from sklearn.inspection import permutation_importance
    
    # Convert model predictions to class labels
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_test_classes = np.argmax(y_test, axis=1)
    
    # Calculate baseline accuracy
    baseline_accuracy = accuracy_score(y_test_classes, y_pred_classes)
    print(f"Baseline accuracy: {baseline_accuracy:.4f}")
    
    # Reshape X_test for sklearn compatibility
    X_test_reshaped = X_test.reshape(X_test.shape[0], -1)
    
    # Calculate permutation importance
    result = permutation_importance(
        lambda x: np.argmax(model.predict(x.reshape(-1, X_test.shape[1], X_test.shape[2])), axis=1),
        X_test_reshaped,
        y_test_classes,
        n_repeats=10,
        random_state=42,
        scoring='accuracy'
    )
    
    # Create DataFrame with importance scores
    importance_df = pd.DataFrame({
        'Feature': feature_names,
        'Importance': result.importances_mean,
        'Std': result.importances_std
    })
    
    # Sort by importance
    importance_df = importance_df.sort_values('Importance', ascending=False)
    
    # Plot feature importance
    plt.figure(figsize=(12, 8))
    plt.barh(importance_df['Feature'][:20], importance_df['Importance'][:20])
    plt.xlabel('Permutation Importance (decrease in accuracy)')
    plt.title(f'Feature Importance for {symbol} (Top 20)')
    plt.tight_layout()
    #plt.savefig(f"{symbol}_feature_importance.png")
    plt.show()
    
    return importance_df
def detect_market_regime(df, window=60):
    """Detect market regime based on volatility and trend"""
    # Calculate rolling volatility
    df['volatility'] = df['Returns'].rolling(window=window).std() * np.sqrt(252)
    
    # Calculate rolling trend (slope of linear regression)
    def rolling_slope(series, window):
        slopes = []
        for i in range(len(series) - window + 1):
            x = np.arange(window)
            y = series.iloc[i:i+window].values
            slope, _, _, _, _ = linregress(x, y)
            slopes.append(slope)
        return pd.Series(np.nan, index=series.index).iloc[window-1:].combine_first(pd.Series(slopes, index=series.index[window-1:len(slopes)+window-1]))
    
    df['trend'] = rolling_slope(df['Close'], window)
    
    # Classify regimes
    # High volatility, positive trend = 'Bull Volatile'
    # High volatility, negative trend = 'Bear Volatile'
    # Low volatility, positive trend = 'Bull Stable'
    # Low volatility, negative trend = 'Bear Stable'
    
    volatility_threshold = df['volatility'].mean() + 0.5 * df['volatility'].std()
    
    df['regime'] = np.where(
        df['volatility'] > volatility_threshold,
        np.where(df['trend'] > 0, 'Bull Volatile', 'Bear Volatile'),
        np.where(df['trend'] > 0, 'Bull Stable', 'Bear Stable')
    )
    
    return df
def evaluate_signal_quality(signals, prediction_horizon=5):
    """Evaluate signal quality using future price movements"""
    # Calculate future returns for different horizons
    for days in range(1, prediction_horizon + 1):
        signals[f'future_return_{days}d'] = signals['price'].pct_change(periods=days).shift(-days)
    
    # Evaluate buy signals
    buy_signals = signals[signals['position'] == 1]
    if len(buy_signals) > 0:
        buy_accuracy = {}
        for days in range(1, prediction_horizon + 1):
            buy_accuracy[days] = np.mean(buy_signals[f'future_return_{days}d'] > 0)
        
        print("Buy Signal Quality:")
        for days, acc in buy_accuracy.items():
            print(f"  {days}-day accuracy: {acc:.4f}")
    
    # Evaluate sell signals
    sell_signals = signals[signals['position'] == -1]
    if len(sell_signals) > 0:
        sell_accuracy = {}
        for days in range(1, prediction_horizon + 1):
            sell_accuracy[days] = np.mean(sell_signals[f'future_return_{days}d'] < 0)
        
        print("Sell Signal Quality:")
        for days, acc in sell_accuracy.items():
            print(f"  {days}-day accuracy: {acc:.4f}")
    
    # Calculate signal timeliness (how early signals are generated)
    # For buy signals: how many days before a significant uptrend
    # For sell signals: how many days before a significant downtrend
    
    # Define significant moves (e.g., 2% in 5 days)
    threshold = 0.02
    
    signals['significant_up'] = np.max([signals[f'future_return_{days}d'] > threshold 
                                        for days in range(1, prediction_horizon + 1)], axis=0)
    signals['significant_down'] = np.max([signals[f'future_return_{days}d'] < -threshold 
                                          for days in range(1, prediction_horizon + 1)], axis=0)
    
    # Return signal quality metrics
    signal_metrics = {
        'buy_precision': buy_accuracy[1] if len(buy_signals) > 0 else 0,
        'sell_precision': sell_accuracy[1] if len(sell_signals) > 0 else 0,
        'buy_signals_count': len(buy_signals),
        'sell_signals_count': len(sell_signals),
        'buy_win_rate': np.mean(buy_signals['strategy_return'] > 0) if len(buy_signals) > 0 else 0,
        'sell_win_rate': np.mean(sell_signals['strategy_return'] > 0) if len(sell_signals) > 0 else 0
    }
    
    return signal_metrics
def hyperparameter_tuning(X_train, y_train, X_val, y_val):
    """Tune hyperparameters using grid search"""
    param_grid = {
        'filters': [[64, 128, 256], [128, 256, 512]],
        'lstm_units': [[64, 32], [128, 64]],
        'dropout_rates': [[0.3, 0.4, 0.5], [0.2, 0.3, 0.4]],
        'learning_rates': [0.001, 0.0005, 0.0001]
    }
    
    best_val_accuracy = 0
    best_params = None
    results = []
    
    # Simple grid search implementation
    for filters in param_grid['filters']:
        for lstm_units in param_grid['lstm_units']:
            for dropout_rates in param_grid['dropout_rates']:
                for lr in param_grid['learning_rates']:
                    print(f"Testing: filters={filters}, lstm_units={lstm_units}, "
                          f"dropout={dropout_rates}, lr={lr}")
                    
                    # Build model with current parameters
                    input_shape = (X_train.shape[1], X_train.shape[2])
                    model = build_model_with_params(
                        input_shape, 
                        filters=filters,
                        lstm_units=lstm_units,
                        dropout_rates=dropout_rates,
                        learning_rate=lr
                    )
                    
                    # Train with early stopping
                    early_stopping = EarlyStopping(
                        monitor='val_accuracy', 
                        patience=10,
                        restore_best_weights=True
                    )
                    
                    history = model.fit(
                        X_train, y_train,
                        validation_data=(X_val, y_val),
                        epochs=50,  # Reduced epochs for hyperparameter search
                        batch_size=32,
                        callbacks=[early_stopping],
                        verbose=0
                    )
                    
                    # Get best validation accuracy
                    val_accuracy = max(history.history['val_accuracy'])
                    print(f"Validation accuracy: {val_accuracy:.4f}")
                    
                    # Track results
                    results.append({
                        'filters': filters,
                        'lstm_units': lstm_units,
                        'dropout_rates': dropout_rates,
                        'learning_rate': lr,
                        'val_accuracy': val_accuracy
                    })
                    
                    # Update best parameters
                    if val_accuracy > best_val_accuracy:
                        best_val_accuracy = val_accuracy
                        best_params = {
                            'filters': filters,
                            'lstm_units': lstm_units,
                            'dropout_rates': dropout_rates,
                            'learning_rate': lr
                        }
    
    print(f"Best parameters: {best_params}")
    print(f"Best validation accuracy: {best_val_accuracy:.4f}")
    
    return best_params, pd.DataFrame(results)
def optimize_decision_thresholds(model, X_val, y_val, df_val):
    """Optimize decision thresholds for trading signals"""
    # Get predictions
    y_pred_proba = model.predict(X_val)
    
    # Initialize variables
    best_sharpe = 0
    best_threshold_up = 0.5
    best_threshold_down = 0.5
    
    # Grid search over thresholds
    for threshold_up in np.arange(0.5, 0.95, 0.05):
        for threshold_down in np.arange(0.5, 0.95, 0.05):
            # Generate signals with current thresholds
            signals = pd.DataFrame(index=range(len(y_pred_proba)))
            signals['pred_down_prob'] = y_pred_proba[:, 0]
            signals['pred_neutral_prob'] = y_pred_proba[:, 1]
            signals['pred_up_prob'] = y_pred_proba[:, 2]
            
            # Apply thresholds
            signals['position'] = 0  # Default is no position
            signals.loc[signals['pred_up_prob'] > threshold_up, 'position'] = 1  # Long
            signals.loc[signals['pred_down_prob'] > threshold_down, 'position'] = -1  # Short
            
            # Add price data
            if len(df_val) >= len(signals):
                signals['price'] = df_val['Close'].values[:len(signals)]
                
                # Calculate returns
                signals['market_return'] = np.log(signals['price'] / signals['price'].shift(1))
                signals['strategy_return'] = signals['position'].shift(1) * signals['market_return']
                
                # Calculate Sharpe ratio
                sharpe_ratio = np.sqrt(252) * signals['strategy_return'].mean() / signals['strategy_return'].std() if signals['strategy_return'].std() > 0 else 0
                
                # Update best thresholds
                if sharpe_ratio > best_sharpe:
                    best_sharpe = sharpe_ratio
                    best_threshold_up = threshold_up
                    best_threshold_down = threshold_down
    
    print(f"Best thresholds: Up={best_threshold_up:.2f}, Down={best_threshold_down:.2f}")
    print(f"Best Sharpe ratio: {best_sharpe:.4f}")
    
    return best_threshold_up, best_threshold_down

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import mutual_info_classif
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
from scipy.stats import linregress
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Conv1D, MaxPooling1D, LSTM, Bidirectional, concatenate, BatchNormalization, Activation
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Dynamic Class Weight Callback (assumed from original code)
class DynamicClassWeightCallback(tf.keras.callbacks.Callback):
    def __init__(self, X_train, y_train, initial_weights, adjustment_factor=0.05):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.class_weights = initial_weights.copy()
        self.adjustment_factor = adjustment_factor

    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(self.X_train)
        y_true = np.argmax(self.y_train, axis=1)
        y_pred_classes = np.argmax(y_pred, axis=1)
        errors = y_true != y_pred_classes
        for cls in self.class_weights:
            class_mask = (y_true == cls)
            error_rate = np.mean(errors[class_mask]) if np.any(class_mask) else 0
            self.class_weights[cls] *= (1 + self.adjustment_factor * error_rate)
        self.model.class_weight = self.class_weights

# Preprocess Stock Data
def preprocess_stock_data(df):
    """Preprocess stock data by handling missing values and creating target labels."""
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)
    df.fillna(0, inplace=True)
    threshold = 0.003  # Threshold for classifying Up/Down movements
    df['target'] = np.where(df['Returns'].shift(-1) > threshold, 2,  # Up
                           np.where(df['Returns'].shift(-1) < -threshold, 0, 1))  # Down, Neutral
    df = df.dropna(subset=['target'])
    df['target'] = df['target'].astype(int)
    return df

# Feature Selection with Mutual Information
def select_features_with_mi(df, features, target_col='target', n_select=30):
    """Select top features using mutual information."""
    data = df.dropna(subset=[target_col])
    mi_scores = mutual_info_classif(data[features], data[target_col])
    mi_df = pd.DataFrame({'Feature': features, 'MI Score': mi_scores})
    mi_df = mi_df.sort_values('MI Score', ascending=False)
    print("Top 15 features by mutual information:")
    print(mi_df.head(15))
    return mi_df.head(n_select)['Feature'].tolist()

# Prepare Multi-Sequence Data
def prepare_multi_sequence_data(df, sequence_lengths, target_column, feature_columns):
    """Prepare sequences of multiple lengths for model input."""
    max_length = max(sequence_lengths)
    X_dict = {length: [] for length in sequence_lengths}
    y = []
    for i in range(max_length, len(df)):
        for length in sequence_lengths:
            X_dict[length].append(df[feature_columns].iloc[i - length:i].values)
        y.append(df[target_column].iloc[i])
    for length in sequence_lengths:
        X_dict[length] = np.array(X_dict[length])
    y = np.array(y)
    return X_dict, y

# Build Multi-Timeframe CNN-BiLSTM Model
def build_multi_timeframe_model(sequence_lengths, num_features, num_classes=3):
    """Build a model with separate branches for each sequence length."""
    inputs = []
    branches = []
    for length in sequence_lengths:
        input_layer = Input(shape=(length, num_features))
        inputs.append(input_layer)
        # CNN Block 1
        conv1 = Conv1D(filters=128, kernel_size=3, padding='same')(input_layer)
        bn1 = BatchNormalization()(conv1)
        act1 = Activation('relu')(bn1)
        pool1 = MaxPooling1D(pool_size=2)(act1)
        drop1 = Dropout(0.2)(pool1)
        # CNN Block 2
        conv2 = Conv1D(filters=256, kernel_size=3, padding='same')(drop1)
        bn2 = BatchNormalization()(conv2)
        act2 = Activation('relu')(bn2)
        pool2 = MaxPooling1D(pool_size=2)(act2)
        drop2 = Dropout(0.3)(pool2)
        # BiLSTM Layers
        lstm1 = Bidirectional(LSTM(units=128, return_sequences=True))(drop2)
        drop_lstm1 = Dropout(0.4)(lstm1)
        lstm2 = Bidirectional(LSTM(units=64, return_sequences=False))(drop_lstm1)
        drop_lstm2 = Dropout(0.4)(lstm2)
        branches.append(drop_lstm2)
    # Concatenate branches
    if len(branches) > 1:
        combined = concatenate(branches)
    else:
        combined = branches[0]
    # Dense Layers
    dense1 = Dense(64, activation='relu')(combined)
    drop_dense = Dropout(0.5)(dense1)
    outputs = Dense(num_classes, activation='softmax')(drop_dense)
    model = Model(inputs=inputs, outputs=outputs)
    optimizer = Adam(learning_rate=0.0005)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Train Model with Advanced Techniques
def train_model_with_advanced_techniques(model, X_train, y_train, X_val, y_val, epochs=100, batch_size=32, symbol=None):
    """Train the model with dynamic class weights and callbacks."""
    y_train_labels = np.argmax(y_train, axis=1)
    class_counts = np.bincount(y_train_labels)
    total_samples = len(y_train_labels)
    class_weights = {i: (total_samples / (len(np.unique(y_train_labels)) * count)) * 1.5 if i != 1 else (total_samples / (len(np.unique(y_train_labels)) * count))
                     for i, count in enumerate(class_counts)}
    dynamic_weights = DynamicClassWeightCallback(X_train, y_train, initial_weights=class_weights, adjustment_factor=0.05)
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6, verbose=1),
        dynamic_weights
    ]
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        class_weight=class_weights,
        callbacks=callbacks,
        verbose=1
    )
    return history, model

# Generate Advanced Trading Signals
def generate_advanced_trading_signals(model, X_test, df_test, confidence_threshold=0.6):
    """Generate trading signals based on model predictions."""
    y_pred_proba = model.predict(X_test)
    signals = pd.DataFrame(index=df_test.index[:len(y_pred_proba)])
    signals['pred_down_prob'] = y_pred_proba[:, 0]
    signals['pred_neutral_prob'] = y_pred_proba[:, 1]
    signals['pred_up_prob'] = y_pred_proba[:, 2]
    signals['predicted_class'] = np.argmax(y_pred_proba, axis=1)
    signals['confidence'] = np.max(y_pred_proba, axis=1)
    signals['position'] = 0
    signals.loc[(signals['predicted_class'] == 2) & (signals['confidence'] > confidence_threshold), 'position'] = 1  # Long
    signals.loc[(signals['predicted_class'] == 0) & (signals['confidence'] > confidence_threshold), 'position'] = -1  # Short
    signals['position_size'] = signals['position'] * (signals['confidence'] - 0.5) * 2
    signals.loc[signals['position_size'] < 0, 'position_size'] = signals['position_size'].abs()
    signals['price'] = df_test['Close'].values[:len(signals)]
    if 'Volatility_20d' in df_test.columns:
        signals['volatility'] = df_test['Volatility_20d'].values[:len(signals)]
        signals['position_size'] = signals['position_size'] / (1 + signals['volatility'] * 10)
    signals['market_return'] = np.log(signals['price'] / signals['price'].shift(1))
    signals['strategy_return'] = signals['position'].shift(1) * signals['market_return']
    signals['sized_strategy_return'] = signals['position_size'].shift(1) * signals['market_return']
    signals['cumulative_market_return'] = np.exp(signals['market_return'].cumsum()) - 1
    signals['cumulative_strategy_return'] = np.exp(signals['sized_strategy_return'].cumsum()) - 1
    signals['drawdown'] = signals['cumulative_strategy_return'] - signals['cumulative_strategy_return'].cummax()
    total_return = np.exp(signals['sized_strategy_return'].sum()) - 1
    annual_return = np.exp(signals['sized_strategy_return'].mean() * 252) - 1
    sharpe_ratio = np.sqrt(252) * signals['sized_strategy_return'].mean() / signals['sized_strategy_return'].std()
    max_drawdown = signals['drawdown'].min()
    win_rate = len(signals[signals['sized_strategy_return'] > 0]) / len(signals[signals['sized_strategy_return'] != 0])
    gross_profits = signals.loc[signals['sized_strategy_return'] > 0, 'sized_strategy_return'].sum()
    gross_losses = abs(signals.loc[signals['sized_strategy_return'] < 0, 'sized_strategy_return'].sum())
    profit_factor = gross_profits / gross_losses if gross_losses != 0 else float('inf')
    performance_metrics = {
        'total_return': total_return,
        'annual_return': annual_return,
        'sharpe_ratio': sharpe_ratio,
        'max_drawdown': max_drawdown,
        'win_rate': win_rate,
        'profit_factor': profit_factor
    }
    return signals, performance_metrics


# Visualize Trading Performance
def visualize_trading_performance(signals, performance_metrics, symbol_name):
    """Visualize trading performance metrics and signals."""
    fig = plt.figure(figsize=(15, 12))
    ax1 = fig.add_subplot(3, 1, 1)
    signals['cumulative_market_return'].plot(ax=ax1, label=f'{symbol_name} Return', color='blue', alpha=0.7)
    signals['cumulative_strategy_return'].plot(ax=ax1, label='Strategy Return', color='green')
    ax1.set_title(f'Cumulative Returns Comparison - {symbol_name}')
    ax1.set_ylabel('Return (%)')
    ax1.legend()
    ax1.grid(True)
    ax2 = fig.add_subplot(3, 1, 2)
    signals['drawdown'].plot(ax=ax2, color='red')
    ax2.set_title('Strategy Drawdown')
    ax2.set_ylabel('Drawdown (%)')
    ax2.grid(True)
    ax3 = fig.add_subplot(3, 1, 3)
    ax3.plot(signals.index, signals['price'], color='black', alpha=0.7)
    buy_signals = signals[signals['position'].diff() > 0]
    sell_signals = signals[signals['position'].diff() < 0]
    ax3.scatter(buy_signals.index, buy_signals['price'], marker='^', color='green', s=100, label='Buy')
    ax3.scatter(sell_signals.index, sell_signals['price'], marker='v', color='red', s=100, label='Sell')
    ax3.set_title(f'Trading Signals - {symbol_name}')
    ax3.set_ylabel('Price')
    ax3.legend()
    plt.figtext(0.01, 0.01, f"""
    {symbol_name} Performance Metrics:
    - Total Return: {performance_metrics['total_return']*100:.2f}%
    - Annual Return: {performance_metrics['annual_return']*100:.2f}%
    - Sharpe Ratio: {performance_metrics['sharpe_ratio']:.2f}
    - Max Drawdown: {performance_metrics['max_drawdown']*100:.2f}%
    - Win Rate: {performance_metrics['win_rate']*100:.2f}%
    - Profit Factor: {performance_metrics['profit_factor']:.2f}
    """, fontsize=12, bbox=dict(facecolor='white', alpha=0.8))
    plt.tight_layout(rect=[0, 0.05, 1, 0.95])
    plt.show()


def run_trading_system_for_symbol(data_path, symbol, confidence_threshold=0.6, sequence_lengths=[30]):
    """Run the trading system for a given symbol with multiple sequence lengths."""
    print(f"Loading data for {symbol}...")
    df = pd.read_csv(data_path)
    if 'Symbol' in df.columns:
        df = df[df['Symbol'] == symbol].copy()
        print(f"Filtered data for {symbol}. Count of rows: {len(df)}")
    else:
        print(f"No Symbol column found. Assuming data is already for {symbol}.")
    if len(df) == 0:
        print(f"No data found for symbol {symbol}.")
        return None, None, None
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)
    
    # Preprocess data
    print("Preprocessing data...")
    df = preprocess_stock_data(df)
    print(f"Missing values after imputation: {df.isna().sum().sum()}")

    # Define feature categories
    price_indicators = ['Close', 'Returns', 'Log_Returns', 'Price_Range', 'Price_Range_Pct']
    moving_averages = [col for col in df.columns if col.startswith(('MA_', 'EMA_', 'Returns_'))]
    volatility_metrics = [col for col in df.columns if col.startswith(('Volatility_', 'Volume_MA_', 'BB_Width_'))]
    technical_indicators = ['RSI_9', 'RSI_14', 'RSI_25', 'MACD', 'Signal_Line', 'MACD_Histogram',
                            'Momentum_14', 'ROC_14', 'MFI_14', 'MFI_28'] + \
                           [col for col in df.columns if col.startswith('Channel_Width_')]
    volume_indicators = ['OBV', 'Volume_Ratio', 'Volume_StdDev']
    fundamental_features = ['PE_Ratio', 'PB_Ratio', 'Dividend_Yield', 'Profit_Margin', 'Beta', 
                            'Enterprise_Value', 'Forward_EPS', 'Trailing_EPS']
    market_features = ['Market_Return', 'Market_Volatility', 'Rolling_Beta', 'VIX', 'VIX_MA_10']
    all_features = (price_indicators + moving_averages + volatility_metrics +
                    technical_indicators + volume_indicators + fundamental_features + market_features)
    features = [f for f in all_features if f in df.columns]
    print(f"Using {len(features)} features")

    # Feature selection
    selected_features = select_features_with_mi(df, features, 'target', n_select=30)
    print(f"Selected {len(selected_features)} features")

    # Prepare sequence data
    print("Preparing sequence data...")
    X_dict, y = prepare_multi_sequence_data(df, sequence_lengths, 'target', selected_features)
    print(f"X_dict shapes: {{ {', '.join(f'{length}: {X_dict[length].shape}' for length in sequence_lengths)} }}, y shape: {y.shape}")
    y_onehot = tf.keras.utils.to_categorical(y, num_classes=3)

    # Split data chronologically
    print("Splitting data...")
    num_samples = len(y)
    train_size = int(0.7 * num_samples)
    val_size = int(0.15 * num_samples)
    X_train_dict = {length: X_dict[length][:train_size] for length in sequence_lengths}
    X_val_dict = {length: X_dict[length][train_size:train_size + val_size] for length in sequence_lengths}
    X_test_dict = {length: X_dict[length][train_size + val_size:] for length in sequence_lengths}
    y_train = y_onehot[:train_size]
    y_val = y_onehot[train_size:train_size + val_size]
    y_test = y_onehot[train_size + val_size:]
    print(f"Training set: {{ {', '.join(f'{length}: {X_train_dict[length].shape}' for length in sequence_lengths)} }}, {y_train.shape}")
    print(f"Validation set: {{ {', '.join(f'{length}: {X_val_dict[length].shape}' for length in sequence_lengths)} }}, {y_val.shape}")
    print(f"Testing set: {{ {', '.join(f'{length}: {X_test_dict[length].shape}' for length in sequence_lengths)} }}, {y_test.shape}")
    class_counts = np.sum(y_train, axis=0)
    print(f"Class distribution in training set: Down: {class_counts[0]}, Neutral: {class_counts[1]}, Up: {class_counts[2]}")

    # Apply SMOTE to training data
    print("Applying SMOTE to training data...")
    num_features = X_dict[sequence_lengths[0]].shape[2]
    total_features = sum(length * num_features for length in sequence_lengths)
    X_train_flattened = np.hstack([X_train_dict[length].reshape(train_size, -1) for length in sequence_lengths])
    y_train_labels = np.argmax(y_train, axis=1)
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train_flattened, y_train_labels)

    # Reshape back to sequences
    X_train_resampled_dict = {}
    start = 0
    for length in sequence_lengths:
        seq_features = length * num_features
        X_train_resampled_dict[length] = X_train_resampled[:, start:start + seq_features].reshape(-1, length, num_features)
        start += seq_features
    y_train_resampled_onehot = tf.keras.utils.to_categorical(y_train_resampled, num_classes=3)
    print(f"Resampled training set: {{ {', '.join(f'{length}: {X_train_resampled_dict[length].shape}' for length in sequence_lengths)} }}, {y_train_resampled_onehot.shape}")

    # Build and train model
    print("Building model...")
    model = build_multi_timeframe_model(sequence_lengths, num_features, num_classes=3)
    model.summary()
    print("Training model...")
    history, model = train_model_with_advanced_techniques(
        model,
        [X_train_resampled_dict[length] for length in sequence_lengths],
        y_train_resampled_onehot,
        [X_val_dict[length] for length in sequence_lengths],
        y_val,
        epochs=100,
        batch_size=32,
        symbol=symbol
    )

    # Evaluate model
    print("Evaluating model...")
    y_test_labels = np.argmax(y_test, axis=1)
    y_pred_proba = model.predict([X_test_dict[length] for length in sequence_lengths])
    y_pred = np.argmax(y_pred_proba, axis=1)
    accuracy = accuracy_score(y_test_labels, y_pred)
    precision = precision_score(y_test_labels, y_pred, average='weighted')
    recall = recall_score(y_test_labels, y_pred, average='weighted')
    f1 = f1_score(y_test_labels, y_pred, average='weighted')
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test_labels, y_pred))

    # Plot confusion matrix
    cm = confusion_matrix(y_test_labels, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Down', 'Neutral', 'Up'], yticklabels=['Down', 'Neutral', 'Up'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix - {symbol}')
    plt.show()

    # Plot training history
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model Accuracy - {symbol}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model Loss - {symbol}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.tight_layout()
    plt.show()

    # Generate trading signals
    print("Generating trading signals...")
    max_length = max(sequence_lengths)
    start_index = max_length + train_size + val_size
    df_test = df.iloc[start_index:start_index + len(y_test)]
    signals, performance_metrics = generate_advanced_trading_signals(
        model, 
        [X_test_dict[length] for length in sequence_lengths], 
        df_test, 
        confidence_threshold
    )
    visualize_trading_performance(signals, performance_metrics, symbol)

    return model, signals, performance_metrics

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

def _run_trading_system_for_symbol(data_path, symbol, confidence_threshold=0.6, sequence_lengths=[30], train_on_all_data=False):
    """
    Run the trading system for a given symbol with multiple sequence lengths.

    Parameters:
    - data_path (str): Path to the CSV file containing stock data.
    - symbol (str): Stock symbol to analyze (e.g., 'AAPL').
    - confidence_threshold (float): Threshold for making trading decisions.
    - sequence_lengths (list): List of sequence lengths for multi-timeframe analysis.
    - train_on_all_data (bool): If True, train the model on all available data without splitting.
                                If False, split into training (70%), validation (15%), and testing (15%) sets.

    Returns:
    - model: Trained TensorFlow model.
    - signals: Trading signals DataFrame (or None if train_on_all_data=True).
    - performance_metrics: Dictionary of performance metrics (or None if train_on_all_data=True).
    """
    # Load and preprocess data
    print(f"Loading data for {symbol}...")
    df = pd.read_csv(data_path)
    if 'Symbol' in df.columns:
        df = df[df['Symbol'] == symbol].copy()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    # Assume preprocess_stock_data and feature selection are defined elsewhere
    df = preprocess_stock_data(df)
    selected_features = select_features_with_mi(df, df.columns, 'target', n_select=30)

    # Prepare sequence data
    print("Preparing sequence data...")
    X_dict, y = prepare_multi_sequence_data(df, sequence_lengths, 'target', selected_features)
    y_onehot = tf.keras.utils.to_categorical(y, num_classes=3)

    # Data splitting based on train_on_all_data
    if train_on_all_data:
        print("Training on all available data...")
        X_train_dict = X_dict
        y_train = y_onehot
        X_val_dict = None
        y_val = None
        X_test_dict = None
        y_test = None
    else:
        print("Splitting data into train, validation, and test sets...")
        num_samples = len(y)
        train_size = int(0.7 * num_samples)
        val_size = int(0.15 * num_samples)
        X_train_dict = {length: X_dict[length][:train_size] for length in sequence_lengths}
        X_val_dict = {length: X_dict[length][train_size:train_size + val_size] for length in sequence_lengths}
        X_test_dict = {length: X_dict[length][train_size + val_size:] for length in sequence_lengths}
        y_train = y_onehot[:train_size]
        y_val = y_onehot[train_size:train_size + val_size]
        y_test = y_onehot[train_size + val_size:]

    # Build model (assume this function is defined elsewhere)
    num_features = X_dict[sequence_lengths[0]].shape[2]
    model = build_multi_timeframe_model(sequence_lengths, num_features, num_classes=3)

    # Train model
    print("Training model...")
    if train_on_all_data:
        history, model = train_model_with_advanced_techniques(
            model,
            [X_train_dict[length] for length in sequence_lengths],
            y_train,
            None,  # No validation data
            None,  # No validation labels
            epochs=100,
            batch_size=32,
            symbol=symbol
        )
        print("Training complete. Returning model only (no evaluation or signals).")
        return model, None, None
    else:
        history, model = train_model_with_advanced_techniques(
            model,
            [X_train_dict[length] for length in sequence_lengths],
            y_train,
            [X_val_dict[length] for length in sequence_lengths],
            y_val,
            epochs=100,
            batch_size=32,
            symbol=symbol
        )

        # Evaluate model and generate signals (assume these functions are defined)
        y_pred_proba = model.predict([X_test_dict[length] for length in sequence_lengths])
        y_pred = np.argmax(y_pred_proba, axis=1)
        y_test_labels = np.argmax(y_test, axis=1)
        print(f"Test accuracy: {accuracy_score(y_test_labels, y_pred):.4f}")

        max_length = max(sequence_lengths)
        start_index = max_length + train_size + val_size
        df_test = df.iloc[start_index:start_index + len(y_test)]
        signals, performance_metrics = generate_advanced_trading_signals(
            model,
            [X_test_dict[length] for length in sequence_lengths],
            df_test,
            confidence_threshold
        )
        return model, signals, performance_metrics

# Example helper function adjustment (must handle None validation data)
def _train_model_with_advanced_techniques(model, X_train, y_train, X_val, y_val, epochs, batch_size, symbol):
    if X_val is not None and y_val is not None:
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            verbose=1
        )
    else:
        history = model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            verbose=1
        )
    return history, model

# Example usage
if __name__ == "__main__":
    model, signals, performance = run_trading_system_for_symbol(
        'stock_data.csv',
        'AAPL',
        train_on_all_data=True  # Train on all data
    )

In [None]:

# Example Usage
if __name__ == "__main__":
    # Run the trading system for TSLA with multiple sequence lengths
    model, signals, performance = run_trading_system_for_symbol(
        'sp500_master_data.csv', 
        'AAPL', 
        confidence_threshold=0.85, 
        sequence_lengths=[7, 30, 60]  # Short-term, medium-term, long-term
    )
    if performance:
        print("\nTrading Performance Metrics:")
        print(f"Total Return: {performance['total_return']*100:.2f}%")
        print(f"Annual Return: {performance['annual_return']*100:.2f}%")
        print(f"Sharpe Ratio: {performance['sharpe_ratio']:.2f}")
        print(f"Maximum Drawdown: {performance['max_drawdown']*100:.2f}%")
        print(f"Win Rate: {performance['win_rate']*100:.2f}%")
        print(f"Profit Factor: {performance['profit_factor']:.2f}")

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight

# Technical Indicator Functions
def calculate_moving_averages(df, windows=[50, 200]):
    """Calculate moving averages for specified windows."""
    for window in windows:
        df[f'MA_{window}'] = df['Close'].rolling(window=window).mean()
    return df

def calculate_rsi(df, window=14):
    """Calculate Relative Strength Index (RSI)."""
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    return df

def calculate_macd(df, short_window=12, long_window=26, signal_window=9):
    """Calculate Moving Average Convergence Divergence (MACD)."""
    short_ema = df['Close'].ewm(span=short_window, adjust=False).mean()
    long_ema = df['Close'].ewm(span=long_window, adjust=False).mean()
    df['MACD'] = short_ema - long_ema
    df['Signal_Line'] = df['MACD'].ewm(span=signal_window, adjust=False).mean()
    return df

def calculate_bollinger_bands(df, window=20):
    """Calculate Bollinger Bands."""
    df['BB_Middle'] = df['Close'].rolling(window=window).mean()
    df['BB_Upper'] = df['BB_Middle'] + 2 * df['Close'].rolling(window=window).std()
    df['BB_Lower'] = df['BB_Middle'] - 2 * df['Close'].rolling(window=window).std()
    return df

# Load and Preprocess Data
def load_and_preprocess_data(file_path, symbol):
    """Load and preprocess stock data with technical indicators."""
    df = pd.read_csv(file_path)
    if 'Symbol' in df.columns:
        df = df[df['Symbol'] == symbol].copy()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    
    # Calculate technical indicators
    df = calculate_moving_averages(df)
    df = calculate_rsi(df)
    df = calculate_macd(df)
    df = calculate_bollinger_bands(df)
    
    # Handle missing values
    df.ffill(inplace=True)
    
    # Define target: 1 if next day's return > 0, else 0
    df['Return'] = df['Close'].pct_change().shift(-1)
    df['Target'] = np.where(df['Return'] > 0, 1, 0)
    df = df.dropna()
    
    # Select features
    features = ['Close', 'Volume', 'Return', 'MA_50', 'MA_200', 'RSI', 'MACD', 'Signal_Line', 'BB_Middle', 'BB_Upper', 'BB_Lower']
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[features])
    return scaled_data, df['Target'].values, features, df

# Create Sequences
def create_sequences(data, targets, seq_length):
    """Create sequences for LSTM input."""
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i - seq_length:i])
        y.append(targets[i])
    return np.array(X), np.array(y)

# Build Model
def build_model(seq_length, num_features):
    """Build a three-layer LSTM model."""
    model = Sequential([
        LSTM(128, input_shape=(seq_length, num_features), return_sequences=True),
        Dropout(0.2),
        LSTM(64, return_sequences=True),
        Dropout(0.2),
        LSTM(32),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Simulate Trades
def simulate_trades(signals, df, stop_loss_pct=0.02, take_profit_pct=0.05, max_holding_days=30):
    """Simulate trades with stop-loss and take-profit."""
    portfolio = 10000  # Initial capital
    position = 0  # 0: no position, 1: long position
    entry_price = 0
    days_held = 0
    trade_returns = []
    portfolio_values = [portfolio]

    for date, row in signals.iterrows():
        signal = row['Signal']
        close_price = df.loc[date, 'Close']

        if position == 0:  # No position
            if signal == 'Buy':
                position = 1
                entry_price = close_price
                days_held = 0
                print(f"Entering position at {close_price} on {date}")
        elif position == 1:  # Holding position
            days_held += 1
            if close_price <= entry_price * (1 - stop_loss_pct):
                position = 0
                trade_return = (close_price - entry_price) / entry_price
                portfolio *= (1 + trade_return)
                trade_returns.append(trade_return)
                print(f"Stop-loss triggered at {close_price} on {date}, return: {trade_return:.2%}")
            elif close_price >= entry_price * (1 + take_profit_pct):
                position = 0
                trade_return = (close_price - entry_price) / entry_price
                portfolio *= (1 + trade_return)
                trade_returns.append(trade_return)
                print(f"Take-profit triggered at {close_price} on {date}, return: {trade_return:.2%}")
            elif days_held >= max_holding_days:
                position = 0
                trade_return = (close_price - entry_price) / entry_price
                portfolio *= (1 + trade_return)
                trade_returns.append(trade_return)
                print(f"Max holding period reached, exiting at {close_price} on {date}, return: {trade_return:.2%}")

        portfolio_values.append(portfolio)

    # Calculate performance metrics
    daily_returns = pd.Series(portfolio_values).pct_change().dropna()
    total_return = (portfolio - 10000) / 10000
    sharpe_ratio = daily_returns.mean() / daily_returns.std() * np.sqrt(252) if daily_returns.std() != 0 else 0
    max_drawdown = (pd.Series(portfolio_values).cummax() - pd.Series(portfolio_values)).max() / pd.Series(portfolio_values).cummax().max()

    return {
        'total_return': total_return,
        'sharpe_ratio': sharpe_ratio,
        'max_drawdown': max_drawdown
    }

# Main Trading System Function
def run_trading_system(file_path, symbol='AAPL', seq_length=30, confidence_threshold=0.6):
    """Run the trading system with updated features and strategy."""
    # Load and preprocess data
    scaled_data, targets, feature_cols, df = load_and_preprocess_data(file_path, symbol)
    
    # Create sequences
    X, y = create_sequences(scaled_data, targets, seq_length)
    
    # Split data: 70% train, 15% validation, 15% test
    num_samples = len(X)
    train_size = int(0.7 * num_samples)
    val_size = int(0.15 * num_samples)
    X_train, X_val, X_test = X[:train_size], X[train_size:train_size + val_size], X[train_size + val_size:]
    y_train, y_val, y_test = y[:train_size], y[train_size:train_size + val_size], y[train_size + val_size:]
    
    # Calculate class weights
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weight_dict = dict(enumerate(class_weights))
    
    # Build and train model
    model = build_model(seq_length, len(feature_cols))
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=100,
        batch_size=32,
        class_weight=class_weight_dict,
        verbose=1
    )
    
    # Evaluate on test set
    y_pred_proba = model.predict(X_test)
    y_pred = (y_pred_proba > confidence_threshold).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {accuracy:.4f}")
    
    # Generate trading signals
    start_index = seq_length + train_size + val_size
    test_dates = df.index[start_index:start_index + len(y_test)]
    signals = pd.DataFrame(index=test_dates)
    signals['Signal'] = np.where(y_pred == 1, 'Buy', 'Hold')
    
    # Simulate trades
    performance_metrics = simulate_trades(signals, df)
    
    return model, signals, performance_metrics

# Example Usage
if __name__ == "__main__":
    model, signals, performance = run_trading_system(
        file_path='sp500_master_data.csv',
        symbol='AAPL',
        seq_length=30,
        confidence_threshold=0.6
    )
    print("\nPerformance Metrics:", performance)
    

  super().__init__(**kwargs)


Epoch 1/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 179ms/step - accuracy: 0.4559 - loss: 0.6993 - val_accuracy: 0.5163 - val_loss: 0.6938
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 68ms/step - accuracy: 0.5363 - loss: 0.6925 - val_accuracy: 0.5163 - val_loss: 0.6931
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 58ms/step - accuracy: 0.5042 - loss: 0.6948 - val_accuracy: 0.5163 - val_loss: 0.6937
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 59ms/step - accuracy: 0.5107 - loss: 0.6960 - val_accuracy: 0.5359 - val_loss: 0.6932
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step - accuracy: 0.5622 - loss: 0.6928 - val_accuracy: 0.5098 - val_loss: 0.6935
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step - accuracy: 0.5373 - loss: 0.6937 - val_accuracy: 0.5163 - val_loss: 0.6937
Epoch 7/100
[1m23/23[0m 

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Function to load and preprocess data
def load_and_preprocess_data(file_path, symbol, seq_length):
    """
    Load stock data from a CSV file, preprocess it, and create sequences for the specified symbol.
    
    Args:
        file_path (str): Path to the CSV file containing stock data.
        symbol (str): Stock symbol to filter (e.g., 'AAPL').
        seq_length (int): Number of time steps in each input sequence.
    
    Returns:
        np.array: Input sequences (X).
        np.array: Target values (y).
        list: List of feature column names.
    """
    # Load data
    df = pd.read_csv(file_path)
    if 'Symbol' in df.columns:
        df = df[df['Symbol'] == symbol].copy()
    else:
        raise ValueError(f"No data found for symbol '{symbol}' in the dataset.")
    
    # Convert 'Date' to datetime and set as index
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    
    # Handle missing values with forward fill
    df.ffill(inplace=True)
    
    # Define target: 1 if next day's return > 0, else 0
    df['Return'] = df['Close'].pct_change().shift(-1)
    df['Target'] = np.where(df['Return'] > 0, 1, 0)
    df = df.dropna()
    
    # Select features (customize as needed)
    features = ['Close', 'Volume', 'Return']
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[features])
    
    # Create sequences
    X, y = [], []
    for i in range(seq_length, len(scaled_data)):
        X.append(scaled_data[i - seq_length:i])
        y.append(df['Target'].iloc[i])
    X, y = np.array(X), np.array(y)
    
    return X, y, features

# Function to build the CNN-BiLSTM model
def build_cnn_bilstm_model(seq_length, num_features, num_classes=1):
    """
    Build a CNN-BiLSTM model for time series forecasting in a trading system.
    
    Args:
        seq_length (int): Number of time steps in each input sequence.
        num_features (int): Number of features in the input data.
        num_classes (int): Number of output classes (default: 1 for binary classification).
    
    Returns:
        Sequential: Compiled Keras model.
    """
    model = Sequential()
    # CNN layers for feature extraction
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(seq_length, num_features)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.2))
    # BiLSTM layers for temporal dependencies
    model.add(Bidirectional(LSTM(64, return_sequences=True)))
    model.add(Dropout(0.2))
    model.add(Bidirectional(LSTM(32)))
    model.add(Dropout(0.2))
    # Dense layers for prediction
    model.add(Dense(32, activation='relu'))
    model.add(Dense(num_classes, activation='sigmoid' if num_classes == 1 else 'softmax'))
    # Compile model
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy' if num_classes == 1 else 'categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Function to split data into train, validation, and test sets
def split_data(X, y, train_ratio=0.7, val_ratio=0.15):
    """
    Split the data into training, validation, and test sets.
    
    Args:
        X (np.array): Input sequences.
        y (np.array): Target values.
        train_ratio (float): Proportion of data for training.
        val_ratio (float): Proportion of data for validation.
    
    Returns:
        tuple: X_train, X_val, X_test, y_train, y_val, y_test
    """
    num_samples = len(X)
    train_size = int(train_ratio * num_samples)
    val_size = int(val_ratio * num_samples)
    X_train, X_val, X_test = X[:train_size], X[train_size:train_size + val_size], X[train_size + val_size:]
    y_train, y_val, y_test = y[:train_size], y[train_size:train_size + val_size], y[train_size + val_size:]
    return X_train, X_val, X_test, y_train, y_val, y_test

# Simulate Trades
def simulate_trades(signals, df, stop_loss_pct=0.02, take_profit_pct=0.05, max_holding_days=30):
    """Simulate trades with stop-loss and take-profit."""
    portfolio = 10000  # Initial capital
    position = 0  # 0: no position, 1: long position
    entry_price = 0
    days_held = 0
    trade_returns = []
    portfolio_values = [portfolio]

    for date, row in signals.iterrows():
        signal = row['Signal']
        close_price = df.loc[date, 'Close']

        if position == 0:  # No position
            if signal == 'Buy':
                position = 1
                entry_price = close_price
                days_held = 0
                print(f"Entering position at {close_price} on {date}")
        elif position == 1:  # Holding position
            days_held += 1
            if close_price <= entry_price * (1 - stop_loss_pct):
                position = 0
                trade_return = (close_price - entry_price) / entry_price
                portfolio *= (1 + trade_return)
                trade_returns.append(trade_return)
                print(f"Stop-loss triggered at {close_price} on {date}, return: {trade_return:.2%}")
            elif close_price >= entry_price * (1 + take_profit_pct):
                position = 0
                trade_return = (close_price - entry_price) / entry_price
                portfolio *= (1 + trade_return)
                trade_returns.append(trade_return)
                print(f"Take-profit triggered at {close_price} on {date}, return: {trade_return:.2%}")
            elif days_held >= max_holding_days:
                position = 0
                trade_return = (close_price - entry_price) / entry_price
                portfolio *= (1 + trade_return)
                trade_returns.append(trade_return)
                print(f"Max holding period reached, exiting at {close_price} on {date}, return: {trade_return:.2%}")

        portfolio_values.append(portfolio)

    # Calculate performance metrics
    daily_returns = pd.Series(portfolio_values).pct_change().dropna()
    total_return = (portfolio - 10000) / 10000
    sharpe_ratio = daily_returns.mean() / daily_returns.std() * np.sqrt(252) if daily_returns.std() != 0 else 0
    max_drawdown = (pd.Series(portfolio_values).cummax() - pd.Series(portfolio_values)).max() / pd.Series(portfolio_values).cummax().max()

    return {
        'total_return': total_return,
        'sharpe_ratio': sharpe_ratio,
        'max_drawdown': max_drawdown
    }

# Main function to run the trading system
def run_trading_system(file_path, symbol='AAPL', seq_length=30, confidence_threshold=0.6):
    """
    Run the trading system for a given stock symbol using a CNN-BiLSTM model.
    
    Args:
        file_path (str): Path to the CSV file with stock data.
        symbol (str): Stock symbol (default: 'AAPL').
        seq_length (int): Number of days in each sequence (default: 30).
        confidence_threshold (float): Threshold for generating buy signals (default: 0.6).
    
    Returns:
        model: Trained CNN-BiLSTM model.
        signals (pd.DataFrame): Generated trading signals.
        performance_metrics (dict): Basic performance metrics.
    """
    # Load and preprocess data
    X, y, feature_cols = load_and_preprocess_data(file_path, symbol, seq_length)
    
    # Split data into train, validation, and test sets
    X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)
    
    # Build the CNN-BiLSTM model
    num_features = X.shape[2]
    model = build_cnn_bilstm_model(seq_length, num_features)
    
    # Train the model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        verbose=1
    )
    
    # Evaluate on test set
    y_pred_proba = model.predict(X_test)
    y_pred = (y_pred_proba > confidence_threshold).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {accuracy:.4f}")
    
    # Generate trading signals
    df = pd.read_csv(file_path)
    if 'Symbol' in df.columns:
        df = df[df['Symbol'] == symbol].copy()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    signals = pd.DataFrame(index=df.index[seq_length + len(X_train) + len(X_val): seq_length + len(X_train) + len(X_val) + len(y_test)])
    signals['Signal'] = np.where(y_pred == 1, 'Buy', 'Hold')
    
    # Basic performance metrics
    performance_metrics = simulate_trades(signals, df)
    
    return model, signals, performance_metrics

# Example usage
if __name__ == "__main__":
    # Replace 'sp500_master_data.csv' with the path to your stock data CSV
    model, signals, performance = run_trading_system(
        file_path='sp500_master_data.csv',
        symbol='AAPL',
        seq_length=30,
        confidence_threshold=0.6
    )
    print("\nPerformance Metrics:", performance)
    print("\nFirst few trading signals:\n", signals.head())

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 84ms/step - accuracy: 0.4598 - loss: 0.6946 - val_accuracy: 0.5163 - val_loss: 0.6928
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.5044 - loss: 0.6941 - val_accuracy: 0.5163 - val_loss: 0.6923
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.4932 - loss: 0.6942 - val_accuracy: 0.5163 - val_loss: 0.6925
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.5020 - loss: 0.6945 - val_accuracy: 0.5163 - val_loss: 0.6931
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.5025 - loss: 0.6935 - val_accuracy: 0.5163 - val_loss: 0.6932
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.5012 - loss: 0.6941 - val_accuracy: 0.5163 - val_loss: 0.6942
Epoch 7/50
[1m23/23[0m [32m━━━