In [1]:
import pandas as pd


In [10]:
df = pd.read_csv('../data/01_raw/combined_stock_pulls/combined_raw_stock_data.csv')


In [11]:
df.head()

Unnamed: 0,date,high,low,open,close,volume,adj_close,ticker
0,2019-01-02,58.869999,56.150002,56.439999,58.48,24892600.0,48.071724,XLE
1,2019-01-03,58.860001,57.240002,58.650002,57.900002,18024100.0,47.594959,XLE
2,2019-01-04,60.049999,58.560001,58.900002,59.869999,21351500.0,49.214329,XLE
3,2019-01-07,61.200001,59.52,60.32,60.759998,18056700.0,49.945934,XLE
4,2019-01-08,61.75,60.900002,61.610001,61.23,18692300.0,50.332279,XLE


In [33]:
def calculate_rolling_means(dataframe: pd.DataFrame, 
                            stock_field: str, 
                            date_field: str,
                            calculation_field: str, 
                            day_ranges: list, 
                            exponential = False) -> pd.DataFrame: 
    
    '''return a dataframe that includes all of the rolling means either straight or exponential appended to the overall dataset
    
    Args:
        dataframe: main dataset (from raw to be fed in or specified in the modeling catalog
        field: field to use on which to calculate the rolling standard deviations
        day_ranges: list of the days over which the rolling mean is to be calculates (e.g., 6, 7, 15)
        exponential: If True will calculate exponential moving averages instead of simple moving averages
    
    '''
    
    # first sort the dataframe:
    dataframe = dataframe.sort_values(by =[stock_field, date_field] )

    for days in day_ranges: # loop through each day range and append the new column after running for each security

        moving_averages = []

        if exponential == True:
            for equity in dataframe[stock_field].unique():
                temp = dataframe[dataframe[stock_field] == equity]
                stock_moving_average = temp[calculation_field].ewm(span = days, min_periods = days).mean()
                moving_averages.extend(stock_moving_average)
                del temp
            
            dataframe[str(days) +'_' + calculation_field + '_' + 'ema'] = moving_averages

        else:
            for equity in dataframe[stock_field].unique():
                temp = dataframe[dataframe[stock_field] == equity]
                stock_moving_average = temp[calculation_field].rolling(days).mean()
                moving_averages.extend(stock_moving_average)
                del temp
           
            dataframe[str(days) +'_' + calculation_field + '_' + 'ema'] = moving_averages

    return dataframe



    # for equity in dataframe['ticker'].unique():

    
    #     if exponential == True:
        
    #         for days in day_ranges:
    #             moving_averages = []
    #             stock_moving_average = dataframe[calculation_field].ewm(span = days, min_periods = days).mean()
    #             moving_averages = moving_averages.extend(stock_moving_average)


    #             dataframe[str(days) +'_' + calculation_field + '_' + 'ema'] = moving_averages

    #     else:
    #         for days in day_ranges:
    #             dataframe[str(days) +'_' + calculation_field + '_' + 'sma'] = dataframe[calculation_field].rolling(days).mean()
        
    # return dataframe

In [39]:
# run on dataframe:

df = calculate_rolling_means(dataframe = df, 
                            stock_field = 'ticker', 
                            date_field= 'date',
                            calculation_field= 'close',
                            day_ranges = [7, 14, 21],
                            exponential= False)

In [40]:
df.shape

(2772, 11)

In [43]:
df[df['ticker'] == 'XLE'].head(15)

Unnamed: 0,date,high,low,open,close,volume,adj_close,ticker,7_close_ema,14_close_ema,21_close_ema
0,2019-01-02,58.869999,56.150002,56.439999,58.48,24892600.0,48.071724,XLE,,,
1,2019-01-03,58.860001,57.240002,58.650002,57.900002,18024100.0,47.594959,XLE,,,
2,2019-01-04,60.049999,58.560001,58.900002,59.869999,21351500.0,49.214329,XLE,,,
3,2019-01-07,61.200001,59.52,60.32,60.759998,18056700.0,49.945934,XLE,,,
4,2019-01-08,61.75,60.900002,61.610001,61.23,18692300.0,50.332279,XLE,,,
5,2019-01-09,62.41,61.41,62.049999,62.200001,18756800.0,51.129635,XLE,,,
6,2019-01-10,62.459999,61.380001,61.73,62.380001,19167600.0,51.277599,XLE,61.221126,,
7,2019-01-11,62.41,61.459999,62.060001,62.009998,14105900.0,50.973446,XLE,61.440285,,
8,2019-01-14,62.110001,61.330002,61.419998,61.860001,13191700.0,50.850151,XLE,61.553732,,
9,2019-01-15,62.669998,61.77,62.200001,62.080002,9821200.0,51.030998,XLE,61.69315,,


In [41]:
# make ready for kedro

Unnamed: 0,date,high,low,open,close,volume,adj_close,ticker,7_close_ema,14_close_ema,21_close_ema
1843,2022-08-25,34.790001,34.209999,34.299999,34.759998,26402600.0,34.759998,XLF,34.642517,34.547761,34.300762
1844,2022-08-26,34.919998,33.689999,34.91,33.720001,38230600.0,33.720001,XLF,34.411888,34.437393,34.247965
1845,2022-08-29,33.720001,33.279999,33.470001,33.48,34872100.0,33.48,XLF,34.178916,34.309741,34.17815
1846,2022-08-30,33.700001,33.099998,33.59,33.299999,41870700.0,33.299999,XLF,33.959187,34.175109,34.098318
1847,2022-08-31,33.59,33.0,33.470001,33.049999,39768200.0,33.049999,XLF,33.73189,34.025094,34.003016
