In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [13]:
file_path = "/Users/sambrown04/Documents/570/Market_Regime/sp-500-historical-data/versions/1/SPX.csv"
df = pd.read_csv(file_path)

In [29]:
# Cut data to when volume started to be tracked
df = df[df['Volume'] != 0]

In [69]:
# make into function that takes any data and calculates appropiate metrics
# daily return %
df['Daily_Return_%'] = df['Close'].pct_change() * 100

#20 day simple moving average (SMA_20)
df['SMA_20'] = df['Close'].rolling(20).mean()

# SMA_50
df['SMA_50'] = df['Close'].rolling(50).mean()

# Volatility 20 day window
df['volatility_20'] = df['Close'].pct_change().rolling(20).std() * 100

#EMA 12 and 26, weight applied to price nth step back is alpha(1-alpha)^n
df["EMA_12"] = df["Close"].ewm(span=12, adjust=False).mean()
df["EMA_26"] = df["Close"].ewm(span=26, adjust=False).mean()

# MACD and signal line
# This measures the "momentum". Price from smaller window back minus price based on larger window back
# Signal line smoothes this metric
df["MACD"] = df["EMA_12"] - df["EMA_26"]
df["Signal_Line"] = df["MACD"].ewm(span=9, adjust=False).mean()

# Bollinger band width
df["BB_Mid"] = df["Close"].rolling(20).mean()
df["BB_Std"] = df["Close"].rolling(20).std()

df["BB_Upper"] = df["BB_Mid"] + 2 * df["BB_Std"]
df["BB_Lower"] = df["BB_Mid"] - 2 * df["BB_Std"]

df["BB_Width"] = (df["BB_Upper"] - df["BB_Lower"]) / df["BB_Mid"]

In [131]:
# Save new dataset to same location
df.to_csv(file_path, index=False)

Now we need to make our target: What "regime" the market is currently in.

In [103]:
def regime_labels(df, N = 21, threshold = 0.05):
    y = pd.Series(name = 'Regime')
    for i in range(len(df) - N):
    
        # return over the next N days
        forward_return = (df.iloc[i+N]['Close'] / df.iloc[i]['Close']) - 1

        if forward_return >= threshold:
            y[i] = 1 # Bull
        elif forward_return <= -threshold:
            y[i] = -1 # Bear
        else:
            y[i] = 0 # Sideways

    return y

In [123]:
y = regime_labels(df)

In [124]:
y.value_counts()

Regime
 0    14209
 1     2148
-1     1449
Name: count, dtype: int64

In [125]:
y.shape

(17806,)

In [129]:
df.shape

(17827, 20)

In [113]:
df.iloc[-500:]

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily_Return_%,SMA_20,SMA_50,volatility_20,EMA_12,EMA_26,MACD,Signal_Line,BB_Mid,BB_Std,BB_Upper,BB_Lower,BB_Width
22823,2018-11-09,2794.100098,2794.100098,2764.239990,2781.010010,2781.010010,4019090000,-0.919901,2740.897498,2829.092798,1.361005,2757.221228,2771.015188,-13.793960,-29.905754,2740.897498,52.538561,2845.974620,2635.820375,0.076674
22824,2018-11-12,2773.929932,2775.989990,2722.000000,2726.219971,2726.219971,3670930000,-1.970149,2739.668994,2825.586797,1.427647,2752.451804,2767.697024,-15.245220,-26.973647,2739.668994,52.582312,2844.833618,2634.504370,0.076772
22825,2018-11-13,2730.050049,2754.600098,2714.979980,2722.179932,2722.179932,4091440000,-0.148192,2735.281995,2822.095996,1.331821,2747.794593,2764.325387,-16.530795,-24.885077,2735.281995,50.009920,2835.301835,2635.262154,0.073133
22826,2018-11-14,2737.899902,2746.800049,2685.750000,2701.580078,2701.580078,4402370000,-0.756741,2729.900500,2818.355596,1.338243,2740.684667,2759.677586,-18.992919,-23.706645,2729.900500,47.356496,2824.613492,2635.187509,0.069389
22827,2018-11-15,2693.520020,2735.379883,2670.750000,2730.199951,2730.199951,4179140000,1.059375,2727.971497,2815.398594,1.331765,2739.071634,2757.494058,-18.422424,-22.649801,2727.971497,46.466834,2820.905165,2635.037828,0.068134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23318,2020-10-29,3277.169922,3341.050049,3259.820068,3310.110107,3310.110107,4903070000,1.194733,3428.031995,3404.929204,1.350729,3393.669387,3402.106604,-8.437216,12.273162,3428.031995,66.974331,3561.980657,3294.083333,0.078149
23319,2020-10-30,3293.590088,3304.929932,3233.939941,3269.959961,3269.959961,4840450000,-1.212955,3424.108997,3402.618203,1.360460,3374.637168,3392.317963,-17.680795,6.282371,3424.108997,73.830013,3571.769024,3276.448970,0.086247
23320,2020-11-02,3296.199951,3330.139893,3279.739990,3310.239990,3310.239990,4310590000,1.231820,3419.190991,3400.879805,1.324136,3364.729910,3386.238114,-21.508204,0.724256,3419.190991,78.071639,3575.334270,3263.047713,0.091333
23321,2020-11-03,3336.250000,3389.489990,3336.250000,3369.159912,3369.159912,4220070000,1.779929,3419.600488,3399.637402,1.355311,3365.411449,3384.973062,-19.561613,-3.332918,3419.600488,77.771089,3575.142667,3264.058310,0.090971
