# 07

In [1]:
import pandas as pd
import numpy as np

In [3]:
macro = pd.read_csv('../data/macro/macro_monthly.csv').iloc[:,1:-1]
target = pd.read_csv('../data/macro/macro_monthly.csv').iloc[:,-1]
lda = pd.read_csv('../data/processed/df_with_lda.csv').iloc[:,-6:]
sentiments = pd.read_csv('../data/processed/sentiments.csv').iloc[1:,:].reset_index().drop(columns=['index'])

In [4]:
final_df = pd.concat([macro, lda, sentiments, target], axis=1).reindex(macro.index)

In [6]:
final_df.head(10)

Unnamed: 0,DFEDTARL,DFEDTARU,GDPC1,CPIAUCSL,UNRATE,PAYEMS,RRSFS,GFDEBTN,VIXCLS,prob_topic_1,prob_topic_2,prob_topic_3,prob_topic_4,prob_topic_5,prob_topic_6,sentiments,DFF
0,,,15267.026,199.3,4.7,135429.0,179293.0,8371156.0,12.95,2.532629e-07,0.442418,0.098614,0.1488264,0.021148,0.288993,-0.1,4.47
1,,,15278.919,199.4,4.8,135737.0,177887.0,8387451.0,12.34,5.791823e-07,0.013136,4.2e-05,0.9707158,0.002117,0.013989,0.0,4.52
2,,,15290.812,199.7,4.7,136047.0,178100.0,8403747.0,11.39,2.570856e-07,0.388304,0.011189,0.4659097,0.011551,0.123046,0.0,5.0
3,,,15302.705,200.7,4.7,136205.0,178088.0,8420042.0,11.59,2.403132e-07,0.492659,0.050694,0.1158198,0.125781,0.215045,0.0,4.86
4,,,15310.592667,201.3,4.6,136244.0,177200.0,8449019.0,16.44,1.027025e-06,0.005665,1e-06,0.9923208,0.000148,0.001864,0.6,5.05
5,,,15318.480333,201.8,4.6,136325.0,177343.0,8477997.0,13.08,3.974326e-07,0.073057,0.349738,3.974326e-07,0.087838,0.489367,0.0,5.05
6,,,15326.368,202.9,4.7,136520.0,176937.0,8506974.0,14.95,3.29258e-07,0.602091,0.044666,0.04766647,0.049294,0.256283,0.0,5.31
7,,,15369.888,203.8,4.7,136694.0,176983.0,8564724.0,12.31,2.579949e-07,0.07628,0.121305,0.01086829,0.048339,0.743207,0.1,5.31
8,,,15413.408,202.8,4.5,136843.0,177002.0,8622474.0,11.98,1.209156e-06,0.057376,0.018356,0.8510057,0.011598,0.061663,-0.3,5.34
9,,,15456.928,201.9,4.4,136852.0,177544.0,8680224.0,11.1,4.024654e-07,0.127895,0.173201,1.495148e-05,0.348323,0.350566,-0.1,5.31


## Feature Engineering Macro Data

In [7]:
final_df.columns

Index(['DFEDTARL', 'DFEDTARU', 'GDPC1', 'CPIAUCSL', 'UNRATE', 'PAYEMS',
       'RRSFS', 'GFDEBTN', 'VIXCLS', 'prob_topic_1', 'prob_topic_2',
       'prob_topic_3', 'prob_topic_4', 'prob_topic_5', 'prob_topic_6',
       'sentiments', 'DFF'],
      dtype='object')

In [8]:
macro_columns = ['GDPC1',
                'CPIAUCSL',
                'UNRATE',
                'PAYEMS',
                'RRSFS',
                'GFDEBTN',
                'VIXCLS']

In [9]:
for col in macro_columns:
    # percentage difference from the previous idx
    diff = (final_df[col].diff() / final_df[col].shift(1)) * 100
    diff_idx = final_df.columns.get_loc(col) + 1
    
    final_df.insert(loc=diff_idx, column=f'{col}_diff', value=diff)
    
    # simple moving averages (SMA 3, 5)
    sma_3 = final_df[col].rolling(window=3).mean()
    sma_5 = final_df[col].rolling(window=5).mean()
    sma_3_idx = diff_idx + 1
    sma_5_idx = sma_3_idx + 1
    
    final_df.insert(loc=sma_3_idx, column=f'{col}_sma_3', value=sma_3)
    final_df.insert(loc=sma_5_idx, column=f'{col}_sma_5', value=sma_5)
    
    # exponential weighted moving average
    ewm = final_df['GDPC1'].ewm(span=40, adjust=False).mean()
    ewm_idx = sma_5_idx + 1
    
    final_df.insert(loc=ewm_idx, column=f'{col}_ewm', value=ewm)
    
final_df = final_df.dropna().reset_index(drop=True)

In [10]:
final_df.head(10)

Unnamed: 0,DFEDTARL,DFEDTARU,GDPC1,GDPC1_diff,GDPC1_sma_3,GDPC1_sma_5,GDPC1_ewm,CPIAUCSL,CPIAUCSL_diff,CPIAUCSL_sma_3,...,VIXCLS_sma_5,VIXCLS_ewm,prob_topic_1,prob_topic_2,prob_topic_3,prob_topic_4,prob_topic_5,prob_topic_6,sentiments,DFF
0,0.0,0.25,15213.302333,-0.375637,15270.664667,15361.4106,15496.550343,211.398,-0.823352,213.848667,...,43.042,15496.550343,1.042734e-06,0.0665958,1.042734e-06,0.932056,1.042734e-06,0.001344982,0.0,0.14
1,0.0,0.25,15155.94,-0.377054,15213.302333,15281.792533,15479.935204,211.933,0.253077,212.161333,...,47.88,15479.935204,1.800641e-07,0.5754637,0.004369047,0.294135,0.0005470247,0.1254853,0.1,0.23
2,0.0,0.25,15148.665667,-0.047997,15172.636,15223.319933,15463.775714,212.705,0.364266,212.012,...,49.272,15463.775714,2.267387e-07,0.8976781,0.00907095,0.081272,0.0008447213,0.01113444,-0.1,0.22
3,0.0,0.25,15141.391333,-0.04802,15148.665667,15185.9928,15448.049647,212.495,-0.098728,212.377667,...,46.122,15448.049647,1.306076e-06,0.0368376,1.306076e-06,0.956843,2.508194e-05,0.006291489,0.1,0.16
4,0.0,0.25,15134.117,-0.048043,15141.391333,15158.683267,15432.735859,212.709,0.100708,212.636333,...,42.366,15432.735859,2.123809e-07,0.6179583,0.09674113,0.136572,0.006222367,0.1425059,0.2,0.2
5,0.0,0.25,15152.485333,0.12137,15142.664556,15146.519867,15419.065102,213.022,0.147149,212.742,...,40.15,15419.065102,6.926605e-07,6.926605e-07,6.926605e-07,0.999997,6.926605e-07,6.926605e-07,0.1,0.19
6,0.0,0.25,15170.853667,0.121223,15152.485333,15149.5026,15406.957227,214.79,0.829961,213.507,...,36.452,15406.957227,3.027721e-07,0.7972925,0.05104788,0.102578,0.00643746,0.04264385,-0.1,0.22
7,0.0,0.25,15189.222,0.121076,15170.853667,15157.613867,15396.335997,214.726,-0.029797,214.179333,...,32.366,15396.335997,2.676054e-07,0.4800797,0.1964681,0.013209,0.07138788,0.2388548,0.0,0.18
8,0.0,0.25,15244.834,0.366128,15201.636556,15178.3024,15388.945655,215.445,0.334845,214.987,...,28.74,15388.945655,5.357356e-07,0.001963263,5.357356e-07,0.998035,5.357356e-07,5.357356e-07,0.3,0.15
9,0.0,0.25,15300.446,0.364792,15244.834,15211.5682,15384.628599,215.861,0.193089,215.344,...,26.562,15384.628599,2.542776e-07,0.6448919,0.1403251,0.10246,0.0157881,0.09653518,-0.1,0.07
