# Feature Engineering
Creation of new variables to improve S&P 500 forecasting models.

In [1]:
# Imports
import pandas as pd
import numpy as np

In [2]:
# Load data
columns = ["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"]
df = pd.read_csv("sp500.csv", skiprows=3, names=columns, header=None, parse_dates=["Date"])
df = df.loc[:, ~df.columns.duplicated()]
df = df.drop(0)  # Remove extra header row
df = df.set_index("Date")
df = df.apply(pd.to_numeric, errors='coerce')

In [3]:
# Feature: Daily return
df['return'] = df['Close'].pct_change()

In [4]:
# Feature: 7-day and 21-day moving averages
df['ma7'] = df['Close'].rolling(window=7).mean()
df['ma21'] = df['Close'].rolling(window=21).mean()

In [5]:
# Feature: 7-day volatility (standard deviation)
df['volatility_7'] = df['return'].rolling(window=7).std()

In [6]:
# Feature: Future price (target for regression)
df['target'] = df['Close'].shift(-1)

In [7]:
# Feature: Upward signal (target for classification)
df['target_up'] = (df['Close'].shift(-1) > df['Close']).astype(int)

In [8]:
# Show the new features
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,return,ma7,ma21,volatility_7,target,target_up
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-12-23,5974.069824,5978.25,5902.569824,5940.25,3593280000,,0.016818,5989.588518,6022.84754,0.01198,5984.629883,1
2024-12-24,6040.040039,6040.100098,5981.439941,5984.629883,1757720000,,0.007471,5977.654227,6024.76516,0.01264,6024.970215,1
2024-12-26,6037.589844,6049.75,6007.370117,6024.970215,2904530000,,0.006741,5972.108538,6026.321847,0.013064,6006.169922,0
2024-12-27,5970.839844,6006.169922,5932.950195,6006.169922,3159610000,,-0.00312,5965.48284,6026.614235,0.01309,5920.669922,0
2024-12-30,5906.939941,5940.790039,5869.160156,5920.669922,3433250000,,-0.014235,5947.342843,6022.164714,0.014004,,0
