In [12]:
import pandas as pd
from itertools import product


In [2]:
df = pd.read_csv('../data/01_raw/combined_stock_pulls/combined_raw_stock_data.csv')


In [3]:
df.head()

Unnamed: 0,date,high,low,open,close,volume,adj_close,ticker
0,2019-01-02,58.869999,56.150002,56.439999,58.48,24892600.0,48.071712,XLE
1,2019-01-03,58.860001,57.240002,58.650002,57.900002,18024100.0,47.594952,XLE
2,2019-01-04,60.049999,58.560001,58.900002,59.869999,21351500.0,49.214333,XLE
3,2019-01-07,61.200001,59.52,60.32,60.759998,18056700.0,49.945919,XLE
4,2019-01-08,61.75,60.900002,61.610001,61.23,18692300.0,50.332279,XLE


In [4]:
def calculate_rolling_means(dataframe: pd.DataFrame, 
                            stock_field: str, 
                            date_field: str,
                            calculation_field: str, 
                            day_ranges: list, 
                            exponential = False) -> pd.DataFrame: 
    
    '''return a dataframe that includes all of the rolling means either straight or exponential appended to the overall dataset
    
    Args:
        dataframe: main dataset (from raw to be fed in or specified in the modeling catalog
        field: field to use on which to calculate the rolling standard deviations
        day_ranges: list of the days over which the rolling mean is to be calculates (e.g., 6, 7, 15)
        exponential: If True will calculate exponential moving averages instead of simple moving averages
    
    '''
    
    # first sort the dataframe:
    dataframe = dataframe.sort_values(by =[stock_field, date_field] )

    for days in day_ranges: # loop through each day range and append the new column after running for each security

        moving_averages = []

        if exponential == True:
            for equity in dataframe[stock_field].unique():
                temp = dataframe[dataframe[stock_field] == equity]
                stock_moving_average = temp[calculation_field].ewm(span = days, min_periods = days).mean()
                moving_averages.extend(stock_moving_average)
                del temp
            
            dataframe[str(days) +'_' + calculation_field + '_' + 'ema'] = moving_averages

        else:
            for equity in dataframe[stock_field].unique():
                temp = dataframe[dataframe[stock_field] == equity]
                stock_moving_average = temp[calculation_field].rolling(days).mean()
                moving_averages.extend(stock_moving_average)
                del temp
           
            dataframe[str(days) +'_' + calculation_field + '_' + 'ema'] = moving_averages

    return dataframe



    # for equity in dataframe['ticker'].unique():

    
    #     if exponential == True:
        
    #         for days in day_ranges:
    #             moving_averages = []
    #             stock_moving_average = dataframe[calculation_field].ewm(span = days, min_periods = days).mean()
    #             moving_averages = moving_averages.extend(stock_moving_average)


    #             dataframe[str(days) +'_' + calculation_field + '_' + 'ema'] = moving_averages

    #     else:
    #         for days in day_ranges:
    #             dataframe[str(days) +'_' + calculation_field + '_' + 'sma'] = dataframe[calculation_field].rolling(days).mean()
        
    # return dataframe

In [5]:
# run on dataframe:

df = calculate_rolling_means(dataframe = df, 
                            stock_field = 'ticker', 
                            date_field= 'date',
                            calculation_field= 'close',
                            day_ranges = [7, 14, 21],
                            exponential= False)

In [6]:
df.head()


Unnamed: 0,date,high,low,open,close,volume,adj_close,ticker,7_close_ema,14_close_ema,21_close_ema
1848,2019-01-02,39.712502,38.557499,38.7225,39.48,148158800.0,38.168354,AAPL,,,
1849,2019-01-03,36.43,35.5,35.994999,35.547501,365248800.0,34.366493,AAPL,,,
1850,2019-01-04,37.137501,35.950001,36.1325,37.064999,234428400.0,35.833588,AAPL,,,
1851,2019-01-07,37.2075,36.474998,37.174999,36.982498,219111200.0,35.753819,AAPL,,,
1852,2019-01-08,37.955002,37.130001,37.389999,37.6875,164101200.0,36.435398,AAPL,,,


In [13]:
for i in product(df['date'].unique(), df['ticker'].unique()):

    list = i
    

In [15]:
list

Unnamed: 0,date,high,low,open,close,volume,adj_close,ticker,7_close_ema,14_close_ema,21_close_ema
1848,2019-01-02,39.712502,38.557499,38.7225,39.48,148158800.0,38.168354,AAPL,,,
1849,2019-01-03,36.43,35.5,35.994999,35.547501,365248800.0,34.366493,AAPL,,,
1850,2019-01-04,37.137501,35.950001,36.1325,37.064999,234428400.0,35.833588,AAPL,,,
1851,2019-01-07,37.2075,36.474998,37.174999,36.982498,219111200.0,35.753819,AAPL,,,
1852,2019-01-08,37.955002,37.130001,37.389999,37.6875,164101200.0,36.435398,AAPL,,,


In [25]:
df.columns[df.columns.str.contains('|'.join(['ema', 'sma']))]

Index(['7_close_ema', '14_close_ema', '21_close_ema'], dtype='object')

AttributeError: 'list' object has no attribute 'lower'