In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import warnings 

In [2]:
warnings.filterwarnings("ignore")

In [3]:
root = Path().resolve().parent  # get project root.
general_data = pd.read_csv(f"{root}/data/clean_data/general_data.csv", index_col="Date")


In [4]:
general_data.head()

Unnamed: 0_level_0,Adj Close_AAPL,Adj Close_AMZN,Adj Close_CVX,Adj Close_GOOGL,Adj Close_GS,Adj Close_JNJ,Adj Close_JPM,Adj Close_MSFT,Adj Close_PFE,Adj Close_XOM,...,volatility_MSFT,volatility_PFE,volatility_XOM,volatility_XLE,volatility_XLF,volatility_XLK,volatility_XLV,gdp,unemployment,inflation
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-07-06,140.663284,114.330002,124.785049,113.888962,271.415039,162.839661,103.496696,259.624359,45.164009,75.138512,...,0.015445,0.027488,0.03245,0.039841,0.003734,0.013255,0.009918,26272.011,3.5,294.94
2022-07-07,144.039139,116.330002,127.213737,118.074867,275.886932,163.022324,106.114395,261.760132,45.737648,77.538483,...,0.014225,0.02936,0.053175,0.062159,0.01627,0.019469,0.010691,26272.011,3.5,294.94
2022-07-08,144.718216,115.540001,126.548943,118.641968,273.918915,162.821396,105.781403,261.038483,45.523602,77.664787,...,0.016003,0.030549,0.053342,0.062122,0.016693,0.019232,0.010748,26272.011,3.5,294.94
2022-07-09,144.718216,115.540001,126.548943,118.641968,273.918915,162.821396,105.781403,261.038483,45.523602,77.664787,...,0.016003,0.030549,0.053342,0.062122,0.016693,0.019232,0.010748,26272.011,3.5,294.94
2022-07-10,144.718216,115.540001,126.548943,118.641968,273.918915,162.821396,105.781403,261.038483,45.523602,77.664787,...,0.014591,0.02406,0.039976,0.043128,0.015809,0.019952,0.006438,26272.011,3.5,294.94


In [5]:
def createfeatures(df):
    stock_tickers = ['AAPL', 'AMZN', 'CVX', 'GOOGL', 'GS', 'JNJ', 'JPM', 'MSFT', 'PFE', 'XOM']
    ETF_tickers = ["XLK", "XLV", "XLF", "XLE"]
    tickers = stock_tickers + ETF_tickers
    for ticker in tickers:

        # Inflation-adjusted returns
        infl_adj_return_col = f"infl_adj_return_{ticker}"
        df[infl_adj_return_col] = df[f"returns_{ticker}"] * df["inflation"]

        # Momentum × GDP
        mom_gdp_col = f"bullish_momentum_{ticker}"
        df[mom_gdp_col] = df[f"momentum_{ticker}"] * df["gdp"]

        # Volatility × Unemployment
        vol_unemp_col = f"vol_unemp_risk_{ticker}"
        df[vol_unemp_col] = df[f"volatility_{ticker}"] * df["unemployment"]

    return df


In [6]:
general_data = createfeatures(general_data)
general_data.head()

Unnamed: 0_level_0,Adj Close_AAPL,Adj Close_AMZN,Adj Close_CVX,Adj Close_GOOGL,Adj Close_GS,Adj Close_JNJ,Adj Close_JPM,Adj Close_MSFT,Adj Close_PFE,Adj Close_XOM,...,vol_unemp_risk_XLK,infl_adj_return_XLV,bullish_momentum_XLV,vol_unemp_risk_XLV,infl_adj_return_XLF,bullish_momentum_XLF,vol_unemp_risk_XLF,infl_adj_return_XLE,bullish_momentum_XLE,vol_unemp_risk_XLE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-07-06,140.663284,114.330002,124.785049,113.888962,271.415039,162.839661,103.496696,259.624359,45.164009,75.138512,...,0.046391,1.983535,24.302824,0.034714,-0.743631,-157.044481,0.013071,-5.079048,-1516.142213,0.139442
2022-07-07,144.039139,116.330002,127.213737,118.074867,275.886932,163.022324,106.114395,261.760132,45.737648,77.538483,...,0.068143,1.2471,135.38921,0.037418,4.250478,221.570192,0.056944,10.44633,-585.627207,0.217556
2022-07-08,144.718216,115.540001,126.548943,118.641968,273.918915,162.821396,105.781403,261.038483,45.523602,77.664787,...,0.067313,0.881159,213.879124,0.037619,-0.642849,164.307941,0.058425,-0.124687,-596.733793,0.217429
2022-07-09,144.718216,115.540001,126.548943,118.641968,273.918915,162.821396,105.781403,261.038483,45.523602,77.664787,...,0.067313,0.0,213.879124,0.037619,0.0,164.307941,0.058425,0.0,-596.733793,0.217429
2022-07-10,144.718216,115.540001,126.548943,118.641968,273.918915,162.821396,105.781403,261.038483,45.523602,77.664787,...,0.069834,0.0,366.261201,0.022534,0.0,255.112925,0.055331,0.0,466.988223,0.150947


In [14]:
general_data.to_csv(f"{root}/data/clean_data/general_data_with_new_features.csv", index= True)