# Imports

In [3]:
import pandas as pd
import numpy as np

In [4]:
prices = pd.read_csv('data/s&p500_data.csv', index_col=0, parse_dates=True)

# Calculate Signals

In [5]:
def calculate_signals(prices, windows=[21, 42, 63, 126, 252]):
    """
    Calculate both momentum and drawdown indicators for multiple timeframes
    
    Parameters:
    prices: pandas Series with datetime index
    windows: list of periods for calculation
    
    Returns:
    DataFrame with momentum and drawdown signals for each timeframe
    """
    # Initialize signals DataFrame
    signals = pd.DataFrame(index=prices.index)
    
    for window in windows:
        # Calculate momentum
        momentum = (prices / prices.shift(window)) - 1
        signals[f'momentum_{window}d'] = momentum
        
        # Add rolling standardization (z-score) to compare across timeframes
        rolling_mean = momentum.rolling(window=window).mean()
        rolling_std = momentum.rolling(window=window).std()
        signals[f'momentum_{window}d_zscore'] = (momentum - rolling_mean) / rolling_std
        
        # Calculate drawdown
        # First get rolling maximum price over the window
        rolling_max = prices.rolling(window=window, min_periods=1).max()
        # Calculate drawdown as percentage decline from peak
        drawdown = (prices - rolling_max) / rolling_max
        signals[f'drawdown_{window}d'] = drawdown
        
        # Add rolling standardization for drawdown
        rolling_mean_dd = drawdown.rolling(window=window).mean()
        rolling_std_dd = drawdown.rolling(window=window).std()
        signals[f'drawdown_{window}d_zscore'] = (drawdown - rolling_mean_dd) / rolling_std_dd
    
    return signals

In [6]:
reg_signals = calculate_signals(prices).dropna().filter(like='zscore')
reg_signals

Unnamed: 0_level_0,momentum_21d_zscore,drawdown_21d_zscore,momentum_42d_zscore,drawdown_42d_zscore,momentum_63d_zscore,drawdown_63d_zscore,momentum_126d_zscore,drawdown_126d_zscore,momentum_252d_zscore,drawdown_252d_zscore
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1996-12-27,-0.646057,1.163140,0.588215,0.751273,0.138208,0.688399,1.565513,0.883739,-0.192319,0.995828
1996-12-30,-0.660231,0.903742,0.060685,0.431100,-0.110603,0.350154,1.213409,0.698161,-0.413831,0.767371
1996-12-31,-1.298849,-0.218207,-0.771952,-0.895762,-0.953448,-1.156255,0.802602,-0.124979,-0.765764,-0.252027
1997-01-02,-1.423046,-0.505028,-1.276743,-1.219028,-1.506107,-1.532454,0.708287,-0.354467,-0.744147,-0.539277
1997-01-03,-0.265907,0.587076,-0.423396,-0.112296,-0.858351,-0.248572,1.747009,0.335995,-0.416698,0.324159
...,...,...,...,...,...,...,...,...,...,...
2024-12-23,-0.982926,-0.963551,-0.807795,-0.968400,-0.873208,-1.298510,-0.846531,-0.262267,-0.183703,-0.480692
2024-12-24,-0.578530,-0.004139,-0.328320,0.101233,-0.394884,-0.077231,-0.522828,0.341703,0.039373,0.208263
2024-12-26,-0.685421,-0.013106,-0.344943,0.059074,-0.566419,-0.107557,-0.556426,0.323455,-0.074626,0.185081
2024-12-27,-1.327090,-0.891718,-1.072390,-0.984458,-0.930216,-1.297679,-0.779085,-0.279994,-0.378340,-0.506939


# Create Polynomial Features

In [7]:
from itertools import combinations_with_replacement

def create_polynomial_features(reg_signals: pd.DataFrame, degree: int = 3) -> pd.DataFrame:
    """
    Create polynomial features up to specified degree from existing z-score signals.
    
    Parameters:
    reg_signals: DataFrame containing momentum and drawdown z-scores
    degree: Maximum polynomial degree (default=3 as per paper)
    
    Returns:
    DataFrame with original and polynomial features
    """
    # Start with original features
    poly_features = reg_signals.copy()
    feature_names = reg_signals.columns.tolist()
    
    # Generate polynomial terms for degrees 2 to specified degree
    for d in range(2, degree + 1):
        # Get all possible combinations of features for current degree
        feature_combinations = combinations_with_replacement(feature_names, d)
        
        # Create polynomial terms
        for combo in feature_combinations:
            # Create feature name
            feature_name = '_x_'.join(feat for feat in combo)
            
            # Calculate polynomial term
            poly_term = np.ones(len(reg_signals))
            for feature in combo:
                poly_term *= reg_signals[feature]
            
            # Add to features DataFrame
            poly_features[feature_name] = poly_term
    
    return poly_features

def normalize_features(features_df: pd.DataFrame) -> pd.DataFrame:
    """
    Normalize features to have zero mean and values between -1 and 1.
    
    Parameters:
    features_df: DataFrame containing all features
    
    Returns:
    DataFrame with normalized features
    """
    normalized_df = pd.DataFrame(index=features_df.index)
    
    for col in features_df.columns:
        series = features_df[col]
        # Center around zero
        centered = series - series.mean()
        # Scale to [-1, 1]
        if centered.std() != 0:
            normalized = centered / max(abs(centered.max()), abs(centered.min()))
            normalized_df[col] = normalized
        else:
            normalized_df[col] = centered
            
    return normalized_df


poly_features = create_polynomial_features(reg_signals)
norm_features = normalize_features(poly_features)



  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = poly_term
  poly_features[feature_name] = po

In [8]:
norm_features

Unnamed: 0_level_0,momentum_21d_zscore,drawdown_21d_zscore,momentum_42d_zscore,drawdown_42d_zscore,momentum_63d_zscore,drawdown_63d_zscore,momentum_126d_zscore,drawdown_126d_zscore,momentum_252d_zscore,drawdown_252d_zscore,...,drawdown_126d_zscore_x_drawdown_126d_zscore_x_drawdown_126d_zscore,drawdown_126d_zscore_x_drawdown_126d_zscore_x_momentum_252d_zscore,drawdown_126d_zscore_x_drawdown_126d_zscore_x_drawdown_252d_zscore,drawdown_126d_zscore_x_momentum_252d_zscore_x_momentum_252d_zscore,drawdown_126d_zscore_x_momentum_252d_zscore_x_drawdown_252d_zscore,drawdown_126d_zscore_x_drawdown_252d_zscore_x_drawdown_252d_zscore,momentum_252d_zscore_x_momentum_252d_zscore_x_momentum_252d_zscore,momentum_252d_zscore_x_momentum_252d_zscore_x_drawdown_252d_zscore,momentum_252d_zscore_x_drawdown_252d_zscore_x_drawdown_252d_zscore,drawdown_252d_zscore_x_drawdown_252d_zscore_x_drawdown_252d_zscore
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996-12-27,-0.157180,0.305104,0.148659,0.160659,0.050369,0.121031,0.336678,0.118896,-0.024889,0.126678,...,0.006036,0.005307,0.005374,0.005884,0.006433,0.005338,-0.003583,0.004457,0.006747,0.005523
1996-12-30,-0.161101,0.242192,0.030469,0.099825,-0.003089,0.068619,0.263221,0.095803,-0.063196,0.100330,...,0.005376,0.005032,0.004671,0.006616,0.006115,0.004589,-0.003922,0.005279,0.006415,0.004726
1996-12-31,-0.337769,-0.029913,-0.156078,-0.152283,-0.184175,-0.164800,0.177519,-0.006629,-0.124058,-0.017238,...,0.004731,0.006042,0.004013,0.004990,0.007313,0.003915,-0.005935,0.002852,0.007637,0.004031
1997-01-02,-0.372127,-0.099476,-0.269173,-0.213705,-0.302915,-0.223092,0.157843,-0.035186,-0.120320,-0.050367,...,0.004651,0.005608,0.003902,0.003954,0.006597,0.003762,-0.005738,0.001540,0.006586,0.003821
1997-01-03,-0.052015,0.165391,-0.077986,-0.003422,-0.163743,-0.024154,0.374542,0.050735,-0.063692,0.049214,...,0.004806,0.005856,0.004084,0.006100,0.007184,0.003984,-0.003930,0.004626,0.007668,0.004105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-23,-0.250372,-0.210681,-0.164108,-0.166084,-0.166936,-0.186842,-0.166525,-0.023713,-0.023399,-0.043610,...,0.004701,0.006039,0.003962,0.005534,0.007319,0.003830,-0.003578,0.003996,0.007676,0.003889
2024-12-24,-0.138500,0.022005,-0.056685,0.037150,-0.064167,0.002396,-0.098994,0.051445,0.015179,0.035847,...,0.004810,0.006130,0.004062,0.005613,0.007476,0.003952,-0.003545,0.004139,0.007953,0.004068
2024-12-26,-0.168070,0.019830,-0.060409,0.029139,-0.101021,-0.002303,-0.106003,0.049174,-0.004535,0.033174,...,0.004798,0.006064,0.004054,0.005623,0.007432,0.003946,-0.003547,0.004146,0.007926,0.004064
2024-12-27,-0.345581,-0.193259,-0.223389,-0.169135,-0.179184,-0.186713,-0.152454,-0.025919,-0.057058,-0.046637,...,0.004693,0.005948,0.003951,0.005270,0.007134,0.003812,-0.003833,0.003506,0.007333,0.003861


In [9]:
reg_signals.to_csv('indicators.csv')
poly_features.to_csv('indicators_w_polyterms.csv')

# Construct Y Matrix

In [10]:
sp_data = pd.read_csv('data/s&p500_data.csv')
sp_data.set_index(['Date'], inplace=True)

In [11]:
# Calculate the **daily returns**
sp_data['daily returns'] = sp_data['Close'].pct_change()
# Compute the **rolling standard deviation** of daily returns over 252 trading days
sp_data['std'] = sp_data['daily returns'].rolling(252).std()

# Calculate the **future 3-day returns**:
sp_data['3 days returns'] = sp_data['Close'].pct_change(3).shift(-3)
# Calculate the **future 5-day returns**:
sp_data['5 days returns'] = sp_data['Close'].pct_change(5).shift(-5)
# Calculate the **future 8-day returns**:
sp_data['8 days returns'] = sp_data['Close'].pct_change(8).shift(-8)
# Calculate the **future 10-day returns**:
sp_data['10 days returns'] = sp_data['Close'].pct_change(10).shift(-10)

# Remove any NaN (missing) values from the dataset.
sp_data.dropna(inplace=True)

In [12]:
sp_data.head()

Unnamed: 0_level_0,Close,daily returns,std,3 days returns,5 days returns,8 days returns,10 days returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1996-01-02,620.72998,0.007793,0.004934,-0.006476,-0.018172,-0.03048,-0.019799
1996-01-03,621.320007,0.000951,0.004932,-0.004603,-0.03676,-0.034604,-0.024062
1996-01-04,617.700012,-0.005826,0.004951,-0.013356,-0.0243,-0.014991,-0.015315
1996-01-05,616.710022,-0.001603,0.004954,-0.02956,-0.024161,-0.016766,-0.007913
1996-01-08,618.460022,0.002838,0.004954,-0.025499,-0.030139,-0.016525,-0.008182


In [13]:
# Define long (buy) and short (sell) signals for a 3-day return horizon
# - A long position (1) is taken if the n-day return is greater than or equal to the rolling standard deviation.
# - A short position (1) is taken if the n-day return is less than or equal to the negative rolling standard deviation.
# - Otherwise, values remain 0.

y = pd.DataFrame(index=sp_data.index)
y['long 3 days'] = (sp_data['3 days returns'] >= sp_data['std']).astype(int)
y['short 3 days'] = (sp_data['3 days returns'] <= -sp_data['std']).astype(int)

y['long 5 days'] = (sp_data['5 days returns'] >= sp_data['std']).astype(int)
y['short 5 days'] = (sp_data['5 days returns'] <= -sp_data['std']).astype(int)

y['long 8 days'] = (sp_data['8 days returns'] >= sp_data['std']).astype(int)
y['short 8 days'] = (sp_data['8 days returns'] <= -sp_data['std']).astype(int)

y['long 10 days'] = (sp_data['10 days returns'] >= sp_data['std']).astype(int)
y['short 10 days'] = (sp_data['10 days returns'] <= -sp_data['std']).astype(int)

In [14]:
y.to_csv('Y_Matrix.csv')