In [237]:
import pandas as pd

In [238]:
# display floats with 2 decimal places
pd.options.display.float_format = '{:,.5f}'.format
 
# expand display limits
pd.options.display.max_rows = 200
pd.options.display.max_columns = 100

In [239]:
df = pd.read_csv('all_stocks_2017-01-01_to_2018-01-01.csv',
                names=['Date', 'Open', 'High', 'Low', 'Close', 'Volume','Name']).reset_index().drop(0)
df.head()

Unnamed: 0,index,Date,Open,High,Low,Close,Volume,Name
1,1.0,2017-01-03,178.83,180.0,177.22,178.05,2510055.0,MMM
2,2.0,2017-01-04,178.03,178.9,177.61,178.32,1541985.0,MMM
3,3.0,2017-01-05,178.26,179.14,176.89,177.71,1447848.0,MMM
4,4.0,2017-01-06,177.29,178.6,175.8,178.23,1625049.0,MMM
5,5.0,2017-01-09,178.37,178.38,177.2,177.27,1622625.0,MMM


In [240]:
for i in ['Open', 'High', 'Low', 'Close', 'Volume']:
    df.loc[:,i] = df.loc[:,i].astype('float')

In [241]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7781 entries, 1 to 7781
Data columns (total 8 columns):
index     7781 non-null float64
Date      7781 non-null object
Open      7757 non-null float64
High      7772 non-null float64
Low       7762 non-null float64
Close     7781 non-null float64
Volume    7781 non-null float64
Name      7781 non-null object
dtypes: float64(6), object(2)
memory usage: 547.1+ KB


In [242]:

print( df.Name.unique() )

['MMM' 'AXP' 'AAPL' 'BA' 'CAT' 'CVX' 'CSCO' 'KO' 'DIS' 'XOM' 'GE' 'GS'
 'HD' 'IBM' 'INTC' 'JNJ' 'JPM' 'MCD' 'MRK' 'MSFT' 'NKE' 'PFE' 'PG' 'TRV'
 'UTX' 'UNH' 'VZ' 'WMT' 'GOOGL' 'AMZN' 'AABA']


In [243]:
df_AAPL = df[df.Name.isin(['AAPL','GOOGL','MSFT','AMZN'])]
 

In [244]:
# pivot dataset

# calculate returns over the prior 7, 14, 21 and 28 days for the first day of each month
# pivot the dataset while keeping only one price column

pivoted_df = df_AAPL.pivot(index='Date', columns='Name', values='Close')

# display examples from pivoted dataset
pivoted_df.tail()

Name,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-22,175.01,1168.36,1068.86,85.51
2017-12-26,170.57,1176.76,1065.85,85.4
2017-12-27,170.6,1182.26,1060.2,85.71
2017-12-28,171.08,1186.1,1055.95,85.72
2017-12-29,169.23,1169.47,1053.4,85.54


In [245]:
# calculate returns over the prior 7, 14, 21 and 28 days using shift method

# shift the index of the dataframe by some number of periods
pivoted_df.tail(3)

Name,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-27,170.6,1182.26,1060.2,85.71
2017-12-28,171.08,1186.1,1055.95,85.72
2017-12-29,169.23,1169.47,1053.4,85.54


In [246]:
pivoted_df.tail(3).shift(1)

Name,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-27,,,,
2017-12-28,170.6,1182.26,1060.2,85.71
2017-12-29,171.08,1186.1,1055.95,85.72


In [247]:
# to calcuate returns over 7 days prior we can use prices_today/prices_7_days_ago -1.0

#calculate returns over 7 days prior
delta_7 = pivoted_df / pivoted_df.shift(7) - 1.0

#display examples
delta_7.tail()

Name,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-22,0.01591,0.00363,0.01662,0.00187
2017-12-26,-0.00958,0.00213,0.00792,0.00838
2017-12-27,-0.01937,0.00265,-0.01101,-0.01313
2017-12-28,-0.03027,-0.00376,-0.02685,-0.00764
2017-12-29,-0.03042,-0.01508,-0.02443,-0.00338


In [248]:
# calculate returns over each window and store them in dictionary using loop

delta_dict = {}
for offset in [7, 14, 21, 28]:
    delta_dict['delta_{}'.format(offset)] = pivoted_df / pivoted_df.shift(offset) - 1.0

In [249]:
# melt data to create analytical base table (ABT)
# melt delta_7 returns

melted_7 = delta_7.reset_index().melt(id_vars='Date', value_name= 'delta_7')

#melted dataframe examples
melted_7.tail()

Unnamed: 0,Date,Name,delta_7
999,2017-12-22,MSFT,0.00187
1000,2017-12-26,MSFT,0.00838
1001,2017-12-27,MSFT,-0.01313
1002,2017-12-28,MSFT,-0.00764
1003,2017-12-29,MSFT,-0.00338


In [250]:
# to do so for all the returns dataframes, loop through delta_dict

#melt all the delta dataframes and store in list
melted_dfs = []
for key, delta_df in delta_dict.items():
    melted_dfs.append( delta_df.reset_index().melt(id_vars=['Date'], value_name=key))

In [251]:
# calculate 7-day returns after the date
return_df = pivoted_df.shift(-7) / pivoted_df - 1.0

# melt the return dataset and append to list
melted_dfs.append( return_df.reset_index().melt(id_vars=['Date'], value_name='return_7') )

In [252]:
# we now have 5 melted dataframes stred in the melted_dfs list
# now to join melted dataframes into a single ABT 

#merge two dataframes
pd.merge(melted_dfs[0], melted_dfs[1], on=['Date']).tail()

Unnamed: 0,Date,Name_x,delta_7,Name_y,delta_14
4011,2017-12-29,GOOGL,-0.02443,MSFT,0.0164
4012,2017-12-29,MSFT,-0.00338,AAPL,-0.00083
4013,2017-12-29,MSFT,-0.00338,AMZN,0.00643
4014,2017-12-29,MSFT,-0.00338,GOOGL,0.00383
4015,2017-12-29,MSFT,-0.00338,MSFT,0.0164


In [253]:
from functools import reduce

In [254]:
# create feature_dfs list that contains base features from original dataset plus melted dataset

# grab features from original dataset
base_df = df[['Date','Name', 'Volume', 'Close']]

# create a list with all the features dataframes
feature_dfs = [base_df] + melted_dfs

In [255]:
# now we can use reduce
# reduce applies to two arguments cummulatively objects and sequence 

# reduce-merge features into analytical bae table
abt = reduce(lambda left,right: pd.merge(left,right,on=['Date','Name']), feature_dfs)

# display examples from the ABT 
abt.tail(10)

Unnamed: 0,Date,Name,Volume,Close,delta_7,delta_14,delta_21,delta_28,return_7
994,2017-12-15,AMZN,4778621.0,1179.14,0.02325,-0.01396,0.04655,0.05218,0.00265
995,2017-12-18,AMZN,2947625.0,1190.58,0.02655,-0.00253,0.04686,0.06002,-0.00376
996,2017-12-19,AMZN,2587792.0,1187.38,0.02184,0.02248,0.05089,0.04811,-0.01508
997,2017-12-20,AMZN,2371166.0,1177.62,0.00744,0.00074,0.04556,0.04294,
998,2017-12-21,AMZN,2123117.0,1174.76,0.00831,0.01068,0.03095,0.04391,
999,2017-12-22,AMZN,1585054.0,1168.36,0.00363,0.03035,0.01055,0.03471,
1000,2017-12-26,AMZN,2005187.0,1176.76,0.00213,0.03083,-0.00779,0.03511,
1001,2017-12-27,AMZN,1867208.0,1182.26,0.00265,0.02596,-0.01135,0.04932,
1002,2017-12-28,AMZN,1841676.0,1186.1,-0.00376,0.02269,-0.00628,0.04292,
1003,2017-12-29,AMZN,2688391.0,1169.47,-0.01508,0.00643,0.00706,0.03504,
