In [4]:
import pandas as pd
import numpy as np
from joblib import Parallel, delayed

# -------------------------------
# Helper Functions to Compute Indicators
# -------------------------------

def compute_indicators_for_stock(df, ma_periods, rsi_period=14):
    """
    Given a DataFrame with columns 'Date' and 'Price', compute moving averages for each period in ma_periods
    and compute the RSI using the specified rsi_period.
    """
    df = df.sort_values('Date').reset_index(drop=True)
    
    # Compute moving averages
    for period in ma_periods:
        df[f"MA {period}"] = df['Price'].rolling(window=period).mean()
    
    # Compute RSI (using the standard 14-day period by default)
    delta = df['Price'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=rsi_period).mean()
    avg_loss = loss.rolling(window=rsi_period).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    return df

# -------------------------------
# Simulation Functions for Strategies
# -------------------------------

def simulate_ma_strategy(df, short_window, long_window):
    """
    Simulate a moving average crossover strategy:
      - Buy when the short MA crosses above the long MA.
      - Sell when the short MA crosses below the long MA.
    """
    trades = []
    in_position = False
    entry_price = 0.0

    ma_short = df[f"MA {short_window}"]
    ma_long  = df[f"MA {long_window}"]
    price    = df['Price']

    for i in range(1, len(df)):
        # Buy signal: short MA crosses above long MA
        if not in_position and (ma_short.iloc[i] > ma_long.iloc[i]) and (ma_short.iloc[i-1] <= ma_long.iloc[i-1]):
            in_position = True
            entry_price = price.iloc[i]
        # Sell signal: short MA crosses below long MA
        elif in_position and (ma_short.iloc[i] < ma_long.iloc[i]) and (ma_short.iloc[i-1] >= ma_long.iloc[i-1]):
            exit_price = price.iloc[i]
            trades.append(exit_price - entry_price)
            in_position = False

    # Close any open position at the end of the series
    if in_position:
        trades.append(price.iloc[-1] - entry_price)
        
    return trades

def simulate_rsi_strategy(df, rsi_buy_threshold, rsi_sell_threshold):
    """
    Simulate an RSI-based strategy:
      - Buy when RSI exceeds the buy threshold.
      - Sell when RSI falls below the sell threshold.
    """
    trades = []
    in_position = False
    entry_price = 0.0
    price = df['Price']
    rsi   = df['RSI']

    for i in range(len(df)):
        if not in_position and (rsi.iloc[i] > rsi_buy_threshold):
            in_position = True
            entry_price = price.iloc[i]
        elif in_position and (rsi.iloc[i] < rsi_sell_threshold):
            exit_price = price.iloc[i]
            trades.append(exit_price - entry_price)
            in_position = False

    if in_position:
        trades.append(price.iloc[-1] - entry_price)
    return trades

def simulate_combined_strategy(df, short_window, long_window, rsi_buy_threshold, rsi_sell_threshold):
    """
    Simulate a combined strategy:
      - Buy when a short MA crosses above a long MA and RSI exceeds the buy threshold.
      - Sell when a short MA crosses below a long MA and RSI falls below the sell threshold.
    """
    trades = []
    in_position = False
    entry_price = 0.0
    price = df['Price']
    ma_short = df[f"MA {short_window}"]
    ma_long  = df[f"MA {long_window}"]
    rsi      = df['RSI']

    for i in range(1, len(df)):
        if not in_position and (ma_short.iloc[i] > ma_long.iloc[i]) and (ma_short.iloc[i-1] <= ma_long.iloc[i-1]) and (rsi.iloc[i] > rsi_buy_threshold):
            in_position = True
            entry_price = price.iloc[i]
        elif in_position and (ma_short.iloc[i] < ma_long.iloc[i]) and (ma_short.iloc[i-1] >= ma_long.iloc[i-1]) and (rsi.iloc[i] < rsi_sell_threshold):
            exit_price = price.iloc[i]
            trades.append(exit_price - entry_price)
            in_position = False

    if in_position:
        trades.append(price.iloc[-1] - entry_price)
    return trades

# -------------------------------
# Wrapper Functions for Parallel Processing
# -------------------------------

def get_trades_for_ticker_ma(df, s, l):
    if f"MA {s}" in df.columns and f"MA {l}" in df.columns:
        return simulate_ma_strategy(df, s, l)
    return []

def get_trades_for_ticker_rsi(df, rsi_buy, rsi_sell):
    if 'RSI' in df.columns:
        return simulate_rsi_strategy(df, rsi_buy, rsi_sell)
    return []

def get_trades_for_ticker_combined(df, s, l, rsi_buy, rsi_sell):
    if (f"MA {s}" in df.columns) and (f"MA {l}" in df.columns) and ('RSI' in df.columns):
        return simulate_combined_strategy(df, s, l, rsi_buy, rsi_sell)
    return []

# -------------------------------
# Main Backtesting Process
# -------------------------------

# Load the CSV file.
# The CSV is assumed to have the first column as "Date" and all subsequent columns are adjusted close prices for S&P 500 stocks.
data = pd.read_csv('fully_cleaned_stock_data.csv', parse_dates=['Date'])
data.sort_values('Date', inplace=True)

# All columns except 'Date' represent different stocks.
tickers = data.columns[1:]

# Define the moving average periods of interest.
short_windows = [5, 7, 10, 15, 20]
long_windows  = [50, 70, 100, 150, 200]
ma_periods = sorted(list(set(short_windows + long_windows)))

# Build a dictionary with processed DataFrames for each ticker.
stock_dfs = {}
for ticker in tickers:
    df_stock = data[['Date', ticker]].rename(columns={ticker: 'Price'})
    df_stock = compute_indicators_for_stock(df_stock, ma_periods, rsi_period=14)
    stock_dfs[ticker] = df_stock

# -------------------------------
# 1. Evaluate the MA Crossover Strategy using Parallel Processing
# -------------------------------
ma_results = []
for s in short_windows:
    for l in long_windows:
        if s < l:
            trades_list = Parallel(n_jobs=-1)(
                delayed(get_trades_for_ticker_ma)(df, s, l) for df in stock_dfs.values()
            )
            # Flatten the list of trade results
            all_trades = [trade for sublist in trades_list for trade in sublist]
            if all_trades:
                avg_pnl = np.mean(all_trades)
                var_pnl = np.var(all_trades)
            else:
                avg_pnl = np.nan
                var_pnl = np.nan
            ma_results.append({
                'Short_MA': s,
                'Long_MA': l,
                'Average_PnL': avg_pnl,
                'Variance_PnL': var_pnl
            })

ma_results_df = pd.DataFrame(ma_results)
print("Moving Average Crossover Strategy Results:")
print(ma_results_df)

# -------------------------------
# 2. Evaluate the RSI-only Strategy using Parallel Processing
# -------------------------------
# Define a list of RSI threshold pairs to test: (RSI_buy_threshold, RSI_sell_threshold)
rsi_thresholds = [
    (70, 30),
    (75, 25),
    (65, 35),
    (80, 20),
    (60, 40)
]

rsi_results = []
for (rsi_buy, rsi_sell) in rsi_thresholds:
    trades_list = Parallel(n_jobs=-1)(
        delayed(get_trades_for_ticker_rsi)(df, rsi_buy, rsi_sell) for df in stock_dfs.values()
    )
    all_trades = [trade for sublist in trades_list for trade in sublist]
    if all_trades:
        avg_pnl = np.mean(all_trades)
        var_pnl = np.var(all_trades)
    else:
        avg_pnl = np.nan
        var_pnl = np.nan
    rsi_results.append({
        'RSI_Buy': rsi_buy,
        'RSI_Sell': rsi_sell,
        'Average_PnL': avg_pnl,
        'Variance_PnL': var_pnl
    })

rsi_results_df = pd.DataFrame(rsi_results)
print("\nRSI Strategy Results:")
print(rsi_results_df)

# -------------------------------
# 3. Evaluate the Combined MA & RSI Strategy using Parallel Processing
# -------------------------------
combined_results = []
for s in short_windows:
    for l in long_windows:
        if s < l:
            for (rsi_buy, rsi_sell) in rsi_thresholds:
                trades_list = Parallel(n_jobs=-1)(
                    delayed(get_trades_for_ticker_combined)(df, s, l, rsi_buy, rsi_sell) for df in stock_dfs.values()
                )
                all_trades = [trade for sublist in trades_list for trade in sublist]
                if all_trades:
                    avg_pnl = np.mean(all_trades)
                    var_pnl = np.var(all_trades)
                else:
                    avg_pnl = np.nan
                    var_pnl = np.nan
                combined_results.append({
                    'Short_MA': s,
                    'Long_MA': l,
                    'RSI_Buy': rsi_buy,
                    'RSI_Sell': rsi_sell,
                    'Average_PnL': avg_pnl,
                    'Variance_PnL': var_pnl
                })

combined_results_df = pd.DataFrame(combined_results)
print("\nCombined MA & RSI Strategy Results:")
print(combined_results_df)


Moving Average Crossover Strategy Results:
    Short_MA  Long_MA  Average_PnL  Variance_PnL
0          5       50     7.871429  6.958341e+05
1          5       70     7.423369  7.307737e+05
2          5      100    15.178919  1.514370e+06
3          5      150    15.532509  1.714463e+06
4          5      200    20.153850  2.455918e+06
5          7       50     7.427176  7.856267e+05
6          7       70    10.152610  8.656064e+05
7          7      100    19.006505  1.689013e+06
8          7      150    21.636478  1.819870e+06
9          7      200    21.248463  2.320816e+06
10        10       50     9.248550  8.046415e+05
11        10       70    12.754032  1.051194e+06
12        10      100    20.331770  1.913846e+06
13        10      150    26.027039  2.050271e+06
14        10      200    24.428110  2.798834e+06
15        15       50     8.970257  8.972154e+05
16        15       70    10.264637  1.046650e+06
17        15      100    25.071525  2.236426e+06
18        15      150    3

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical

# -------------------------------
# 1. Compute Technical Indicators
# -------------------------------

def compute_indicators_for_stock(df, ma_periods, rsi_period=14):
    """
    Given a DataFrame with columns 'Date' and 'Price', compute moving averages for each period
    in ma_periods and compute the RSI using the specified rsi_period.
    """
    df = df.sort_values('Date').reset_index(drop=True)
    
    # Compute moving averages
    for period in ma_periods:
        df[f"MA {period}"] = df['Price'].rolling(window=period).mean()
    
    # Compute RSI (using the standard 14-day period)
    delta = df['Price'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=rsi_period).mean()
    avg_loss = loss.rolling(window=rsi_period).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    return df

# -------------------------------
# 2. Create Target Signal
# -------------------------------

def add_target_signal(df, threshold=0.005):
    """
    Create a target signal based on the next day's return.
      - If future return > threshold: label as BUY (2)
      - If future return < -threshold: label as SELL (0)
      - Otherwise: HOLD (1)
    The label is attached as the column 'Signal'.
    """
    df = df.copy()
    df['FutureReturn'] = df['Price'].shift(-1) / df['Price'] - 1
    # Assign signals based on the threshold:
    #   Buy = 2, Hold = 1, Sell = 0
    df['Signal'] = df['FutureReturn'].apply(lambda x: 2 if x > threshold else (0 if x < -threshold else 1))
    # Drop the last row (because FutureReturn is NaN)
    df = df[:-1]
    return df

# -------------------------------
# 3. Load Data and Preprocess
# -------------------------------

# Load the CSV file.
# The CSV is assumed to have the first column "Date" and subsequent columns are adjusted close prices for S&P 500 stocks.
data = pd.read_csv('fully_cleaned_stock_data.csv', parse_dates=['Date'])
data.sort_values('Date', inplace=True)

# All columns except "Date" represent different stocks.
tickers = data.columns[1:]

# Define moving average periods of interest.
short_windows = [5, 7, 10, 15, 20]
long_windows  = [50, 70, 100, 150, 200]
ma_periods = sorted(list(set(short_windows + long_windows)))

# Build a dictionary with processed DataFrames (with computed indicators) for each ticker.
stock_dfs = {}
for ticker in tickers:
    df_stock = data[['Date', ticker]].rename(columns={ticker: 'Price'})
    df_stock = compute_indicators_for_stock(df_stock, ma_periods, rsi_period=14)
    # Add target signal column using a 0.5% threshold (adjust as needed)
    df_stock = add_target_signal(df_stock, threshold=0.005)
    # Drop rows with any missing values (from moving averages, RSI, etc.)
    df_stock = df_stock.dropna().reset_index(drop=True)
    stock_dfs[ticker] = df_stock

# -------------------------------
# 4. Split Data into Training and Testing
# -------------------------------

# We split each stock's time series into training (first 70%) and testing (last 30%).
train_list = []
test_list = []

for ticker, df in stock_dfs.items():
    n = len(df)
    if n < 50:  # Skip stocks with too few data points
        continue
    train_df = df.iloc[:int(0.7 * n)].copy()
    test_df = df.iloc[int(0.7 * n):].copy()
    train_df['Ticker'] = ticker
    test_df['Ticker'] = ticker
    train_list.append(train_df)
    test_list.append(test_df)

train_data = pd.concat(train_list, ignore_index=True)
test_data = pd.concat(test_list, ignore_index=True)

# Define feature columns – here we use Price, all moving averages, and RSI.
feature_cols = ['Price'] + [f"MA {period}" for period in ma_periods] + ['RSI']

# Extract features and labels for training.
X_train = train_data[feature_cols].values
y_train = train_data['Signal'].values

# Convert target labels to categorical (3 classes: 0 = sell, 1 = hold, 2 = buy).
y_train_cat = to_categorical(y_train, num_classes=3)

# -------------------------------
# 5. Build and Train the Deep Learning Model(s)
# -------------------------------

def build_model(hidden_layers, input_dim):
    """
    Build a Sequential model.
    :param hidden_layers: List with number of neurons for each hidden layer.
    :param input_dim: Dimension of the input features.
    :return: Compiled Keras model.
    """
    model = Sequential()
    model.add(Input(shape=(input_dim,)))
    for units in hidden_layers:
        model.add(Dense(units, activation='relu'))
    # Output layer: 3 neurons (sell, hold, buy) with softmax activation.
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Define a set of configurations to experiment with.
# The key "1_hidden_32" represents a 3-layer network (input, one hidden, output).
configs = {
    "1_hidden_32": [32],
    "1_hidden_64": [64],
    "2_hidden_32_16": [32, 16],
    "2_hidden_64_32": [64, 32]
}

# Dictionary to store models and performance metrics.
results = {}

input_dim = X_train.shape[1]

for config_name, hidden_layers in configs.items():
    print(f"Training model: {config_name} with layers {hidden_layers}")
    model = build_model(hidden_layers, input_dim)
    # Train the model; you can adjust epochs and batch size as needed.
    model.fit(X_train, y_train_cat, epochs=10, batch_size=32, validation_split=0.1, verbose=1)
    
    # Save the model in results dict for later use.
    results[config_name] = {"model": model}

# -------------------------------
# 6. Simulate Trading on Test Data
# -------------------------------

def simulate_ml_trading(df, model, feature_cols):
    """
    Given a test DataFrame for one stock and a trained model, generate predictions,
    then simulate trading:
      - If not in position and predicted signal == BUY (2), enter a position.
      - If in position and predicted signal == SELL (0), exit the position.
      - Hold means no action.
    Returns a list of trade P&Ls.
    """
    X = df[feature_cols].values
    preds = model.predict(X)
    predicted_signal = np.argmax(preds, axis=1)
    
    trades = []
    in_position = False
    entry_price = 0.0
    
    for i in range(len(df)):
        signal = predicted_signal[i]
        price = df['Price'].iloc[i]
        # Buy signal: predicted BUY (2) and not in position.
        if not in_position and signal == 2:
            in_position = True
            entry_price = price
        # Sell signal: predicted SELL (0) and in position.
        elif in_position and signal == 0:
            exit_price = price
            trades.append(exit_price - entry_price)
            in_position = False
    # If still in position at the end, close the trade at the last price.
    if in_position:
        trades.append(df['Price'].iloc[-1] - entry_price)
    return trades

# For each configuration, simulate trading over each stock's test set and aggregate P&L.
performance = {}

for config_name, config in configs.items():
    model = results[config_name]["model"]
    all_trades = []
    
    # Group test_data by ticker and simulate trades.
    for ticker, df in test_data.groupby('Ticker'):
        # Ensure the data is sorted by date.
        df = df.sort_values('Date').reset_index(drop=True)
        trades = simulate_ml_trading(df, model, feature_cols)
        all_trades.extend(trades)
        
    if all_trades:
        avg_pnl = np.mean(all_trades)
        var_pnl = np.var(all_trades)
    else:
        avg_pnl, var_pnl = np.nan, np.nan
    performance[config_name] = {"Average_PnL": avg_pnl, "Variance_PnL": var_pnl}

# -------------------------------
# 7. Report Performance
# -------------------------------
performance_df = pd.DataFrame.from_dict(performance, orient='index')
print("\nML Strategy Performance (Test Data):")
print(performance_df)
