In [4]:
import pandas as pd
import numpy as np

data = pd.read_csv('final_combined_sp500.csv')
numerical_columns = ['Price', 'Open', 'High', 'Low', 'FederalFundsRate', 'InflationRate', 'GDP', 'Debt', 'UnemploymentRate']
data[numerical_columns] = data[numerical_columns].replace(',', '', regex=True).astype(float)

def add_variation_to_data(group_of_values, std_set):
    mean_val_of_month_or_quater = group_of_values.mean()
    return group_of_values + np.random.normal(scale=std_set * mean_val_of_month_or_quater, size=len(group_of_values))
    

data['FederalFundsRate'] = data.groupby(data['Date'].str[:7])['FederalFundsRate'].transform(lambda x: add_variation_to_data(x, 0.005))
data['InflationRate'] = data.groupby(data['Date'].str[:7])['InflationRate'].transform(lambda x: add_variation_to_data(x, 0.005))
data['UnemploymentRate'] = data.groupby(pd.PeriodIndex(data['Date'], freq='Q'))['UnemploymentRate'].transform(lambda x: add_variation_to_data(x, 0.005))
data['GDP'] = data.groupby(pd.PeriodIndex(data['Date'], freq='Q'))['GDP'].transform(lambda x: add_variation_to_data(x, 0.01))
data['Debt'] = data.groupby(pd.PeriodIndex(data['Date'], freq='Q'))['Debt'].transform(lambda x: add_variation_to_data(x, 0.01))


print(data.head())

         Date    Price     Open     High      Low  FederalFundsRate  \
0  2019-05-15  2850.96  2820.38  2858.68  2815.08          2.378406   
1  2019-05-14  2834.41  2820.12  2852.54  2820.12          2.389895   
2  2019-05-13  2811.87  2840.19  2840.19  2801.43          2.393938   
3  2019-05-10  2881.40  2863.10  2891.31  2825.39          2.400918   
4  2019-05-09  2870.72  2859.84  2875.97  2836.40          2.401057   

   InflationRate           GDP          Debt  UnemploymentRate  
0     254.028156  21635.520175  2.192903e+07          3.314616  
1     255.170190  21387.097968  2.194828e+07          3.284601  
2     255.023199  21816.185662  2.219149e+07          3.310763  
3     252.898622  21409.192287  2.215984e+07          3.300068  
4     253.973934  21459.716577  2.202540e+07          3.306936  


In [6]:
data.to_csv('processed_data.csv', index=False)