forked from StevenDowney86/Public_Research_and_Backtests
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Replicating Meb Faber Momentum and Trend Strategy (Public).py
322 lines (254 loc) · 10.8 KB
/
Replicating Meb Faber Momentum and Trend Strategy (Public).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 9 17:16:11 2019
@author: downey
"""
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import time
from itertools import product
import seaborn as sns
results = pd.DataFrame()
pd.set_option('display.max_columns', 50)
#parse the dates in monthly fama french
mydateparser = lambda x: pd.datetime.strptime(x, "%Y%m")
raw = pd.read_csv('Coding/Python/Data Files/10_Industry_Portfolios copy.csv', index_col = 0, parse_dates = True, date_parser=mydateparser)
#slice the data to be start - end of 2008 so I can replicate Meb Faber's work
#which ended 2008
raw2 = raw.iloc[0:990,]
raw2.head()
raw2.tail()
raw = raw2
raw.tail()
raw.head(10)
#convert to decimal, which gets to percentage terms
raw = raw/100
raw.describe()
asset_number = raw.shape[1]
asset_number
##########create rolling returns and index values#########
index = (1+raw).cumprod()
index.head()
index.tail()
log_returns = np.log(index/index.shift(1))
log_returns.head(10)
log_returns.tail()
#create new column names
nameslist = []
for col in index.columns:
nameslist.append(col + str(' log returns'))
log_returns.columns = nameslist
#join dataframes
index = index.join(log_returns, on = 'Date')
column_number_end_returns = index.shape[1]
#choose the number of months look back for momentum
month_lookback = 12
rolling_sum = index.iloc[:,asset_number:].rolling(month_lookback).sum()
rolling_sum.tail(15)
nameslist2 = []
for col in index.iloc[:,0:asset_number].columns:
nameslist2.append(col + str(' log ret. roll'))
rolling_sum.columns = nameslist2
index = index.join(rolling_sum, on = 'Date')
index.tail()
######TRYING TO IMPLEMENT VIA FOR LOOP##########
#range_values = range(1,1,1)
#top_assets = range(1,11,1)
#names = list(raw.columns.values)
#momentum_df = pd.DataFrame()
#for rangevalues, topassets in product(range_values, top_assets):
#print('Testing average of length: '+ str(rangevalues))
#rolling_sum = log_returns.rolling(rangevalues).sum()
#sortedvalues = rolling_sum.apply(pd.Series.rank, axis = 1)
#df3 = rolling_sum
#df4 = df3.drop(['NoDur','Durbl','Manuf','Enrgy','HiTec','Telcm','Shops','Utils','Hlth ','Other'], axis = 1)
#Rank_Number = 11 - topassets
#for string in names:
#first_column = pd.DataFrame(np.where(sortedvalues[string] >= Rank_Number, 1, 0))
#df4 = pd.concat([df4, first_column], axis = 1, ignore_index = False)
#df4.head(15)
####################################################
#sort values at each rolling period
sortedvalues = index.iloc[:,column_number_end_returns:].apply(pd.Series.rank, axis = 1)
sortedvalues.head(15)
sortedvalues.tail(15)
#create columns
nameslist3 = []
for col in index.iloc[:,:asset_number].columns:
nameslist3.append(col + str(' rank'))
sortedvalues.columns = nameslist3
index = index.join(sortedvalues, on = 'Date')
index.tail()
#the rank is 1 - 10 with 10 being the best, thus 8,9,10 is top 3
#choose the top number of assets out of 10 to include
Top_Assets = 3
#Have to use (asset_number + 1) minus the Top_Assets number in order for the rank to work
Rank_Number = (asset_number+1) - Top_Assets
df3 = rolling_sum
#couldn't figure out how to create DataFrame with same date index so did it
#this way which isn't pretty
df4 = df3.drop([df3.columns[0],
df3.columns[1],
df3.columns[2],
df3.columns[3],
df3.columns[4],
df3.columns[5],
df3.columns[6],
df3.columns[7],
df3.columns[8],
df3.columns[9]],axis='columns')
df4.head()
df4['NonDur_allocation'] = np.where(sortedvalues.iloc[:,0] >= Rank_Number, 1, 0)
df4['Durbl_allocation'] = np.where(sortedvalues.iloc[:,1] >= Rank_Number, 1, 0)
df4['Manuf_allocation'] = np.where(sortedvalues.iloc[:,2] >= Rank_Number, 1, 0)
df4['Enrgy_allocation'] = np.where(sortedvalues.iloc[:,3] >= Rank_Number, 1, 0)
df4['HiTec_allocation'] = np.where(sortedvalues.iloc[:,4] >= Rank_Number, 1, 0)
df4['Telcm_allocation'] = np.where(sortedvalues.iloc[:,5] >= Rank_Number, 1, 0)
df4['Shops_allocation'] = np.where(sortedvalues.iloc[:,6] >= Rank_Number, 1, 0)
df4['Utils_allocation'] = np.where(sortedvalues.iloc[:,7] >= Rank_Number, 1, 0)
df4['Hlth_allocation'] = np.where(sortedvalues.iloc[:,8] >= Rank_Number, 1, 0)
df4['Other_allocation'] = np.where(sortedvalues.iloc[:,9] >= Rank_Number, 1, 0)
df4.head(15)
df4.tail()
index = index.join(df4, on = 'Date')
index.tail()
#Create SMA dataframe so that if the chosen assets are in uptrend allocate
#otherwise put in cash
rolling_SMA = df3.drop([df3.columns[0],
df3.columns[1],
df3.columns[2],
df3.columns[3],
df3.columns[4],
df3.columns[5],
df3.columns[6],
df3.columns[7],
df3.columns[8],
df3.columns[9]],axis='columns')
#easier way to create SMA df
SMA = month_lookback
df_sma = index.iloc[:,0:asset_number].rolling(SMA).mean()
df_sma.tail(15)
#create columns
nameslist4 = []
for col in index.iloc[:,:asset_number].columns:
nameslist4.append(col + str(' SMA'))
df_sma.columns = nameslist4
index = index.join(df_sma, on = 'Date')
index.tail()
#create new data frame that gives proper asset weight
df5 = df4/Top_Assets
df5.tail()
#create columns
nameslist5 = []
for col in index.iloc[:,:asset_number].columns:
nameslist5.append(col + str(' weighted allocation'))
df5.columns = nameslist5
index = index.join(df5, on = 'Date')
index.tail()
#invest in assets that are topped rank only if above SMA trend
df7 = df3.drop([df3.columns[0],
df3.columns[1],
df3.columns[2],
df3.columns[3],
df3.columns[4],
df3.columns[5],
df3.columns[6],
df3.columns[7],
df3.columns[8],
df3.columns[9]],axis='columns')
index.columns
df7['NonDur'] = np.where(index.iloc[:,0] >= index.iloc[:,-asset_number*2], index.iloc[:,-asset_number], 0)
df7['Durbl'] = np.where(index.iloc[:,1] >= index.iloc[:,-asset_number*2+1], index.iloc[:,-asset_number+1], 0)
df7['Manuf'] = np.where(index.iloc[:,2] >= index.iloc[:,-asset_number*2+2], index.iloc[:,-asset_number+2], 0)
df7['Enrgy'] = np.where(index.iloc[:,3] >= index.iloc[:,-asset_number*2+3], index.iloc[:,-asset_number+3], 0)
df7['HiTec'] = np.where(index.iloc[:,4] >= index.iloc[:,-asset_number*2+4], index.iloc[:,-asset_number+4], 0)
df7['Telcm'] = np.where(index.iloc[:,5] >= index.iloc[:,-asset_number*2+5], index.iloc[:,-asset_number+5], 0)
df7['Shops'] = np.where(index.iloc[:,6] >= index.iloc[:,-asset_number*2+6], index.iloc[:,-asset_number+6], 0)
df7['Utils'] = np.where(index.iloc[:,7] >= index.iloc[:,-asset_number*2+7], index.iloc[:,-asset_number+7], 0)
df7['Hlth'] = np.where(index.iloc[:,8] >= index.iloc[:,-asset_number*2+8], index.iloc[:,-asset_number+8], 0)
df7['Other'] = np.where(index.iloc[:,9] >= index.iloc[:,-asset_number*2+9], index.iloc[:,-asset_number+9], 0)
#create columns
nameslist6 = []
for col in index.iloc[:,:asset_number].columns:
nameslist6.append(col + str(' SMA Filtered Allocation'))
df7.columns = nameslist6
index = index.join(df7, on = 'Date')
#multiply the weights in index by the returns in raw returns and shift so no look
#ahead bias. the top ranked this month, those weights apply to next month
#returns
df6 = pd.DataFrame()
df6['NonDur_Port_Return'] = index.iloc[:,-asset_number].shift(1)*raw.iloc[:,0]
df6['Durbl_Port_Return'] = index.iloc[:,-asset_number+1].shift(1)*raw.iloc[:,1]
df6['Manuf_Port_Return'] = index.iloc[:,-asset_number+2].shift(1)*raw.iloc[:,2]
df6['Enrgy_Port_Return'] = index.iloc[:,-asset_number+3].shift(1)*raw.iloc[:,3]
df6['HiTec_Port_Return'] = index.iloc[:,-asset_number+4].shift(1)*raw.iloc[:,4]
df6['Telcm_Port_Return'] = index.iloc[:,-asset_number+5].shift(1)*raw.iloc[:,5]
df6['Shops_Port_Return'] = index.iloc[:,-asset_number+6].shift(1)*raw.iloc[:,6]
df6['Hlth_Port_Return'] = index.iloc[:,-asset_number+7].shift(1)*raw.iloc[:,7]
df6['Util_Port_Return'] = index.iloc[:,-asset_number+8].shift(1)*raw.iloc[:,8]
df6['Other_Port_Return'] = index.iloc[:,-asset_number+9].shift(1)*raw.iloc[:,9]
df6.tail()
#create columns
nameslist7 = []
for col in index.iloc[:,:asset_number].columns:
nameslist7.append(col + str(' Str Return'))
df6.columns = nameslist7
index = index.join(df6, on = 'Date')
#sum the returns of the portfolio assets
df8 = pd.DataFrame()
df8['Strategy Returns'] = index.iloc[:,-asset_number:].sum(axis = 1, skipna = True)
index = index.join(df8, on = 'Date')
#index of the cumulative geometric returns
performance = (1+index.iloc[:,-1]).cumprod()
perf = performance.iloc[-1,]
print(perf)
#find the number of months in the dataframe
column_length = index.iloc[:,-1].shape[0]
years = column_length/12
#get annualized returns for strategy - getting wrong answer here
annualized_return = (perf ** (1/years)) - 1
print(annualized_return)
#annualized standard deviation of portfolio
std = index.iloc[:,-1].std() * (12**.5)
print(std)
#repeat the results manually changing assets and lookback window
results = results.append(pd.DataFrame(
{'Window': month_lookback, "Top Assets": Top_Assets,
'Std Dev': std,
'STRATEGY': annualized_return},
index = [0]), ignore_index = True)
results
########################################################################
heatmapresults = results
heatmapresults = heatmapresults.pivot("Window","Top Assets","STRATEGY")
heatmapresults.head()
ax = sns.heatmap(heatmapresults, annot = True, cmap="YlGnBu")
######calculate max drawdown#########
# We are going to use a trailing 12 month trading day window
window = 12
# Calculate the max drawdown in the past window days for each day in the series.
# Use min_periods=1 if you want to let the first 252 days data have an expanding window
Roll_Max = pd.DataFrame()
Roll_Max = performance.rolling(window).max()
Monthly_Drawdown = performance/Roll_Max - 1.0
# Next we calculate the minimum (negative) daily drawdown in that window.
# Again, use min_periods=1 if you want to allow the expanding window
Max_Monthly_Drawdown = Monthly_Drawdown.rolling(100).min()
# Plot the results
Monthly_Drawdown.plot()
Max_Monthly_Drawdown.plot()
##########################
raw2 = pd.read_csv('/Users/downey/Coding/Python/Python for Finance/F-F_Research_Data_Factors_daily.csv', index_col = 0, parse_dates = True)
raw2.head()
raw2['Market+RF'] = raw2['Mkt-RF'] + raw2['RF']
raw2 = raw2/100
raw2['index'] = (1+raw2['Market+RF']).cumprod()
column_length2 = raw2.shape[0]
last_row = raw2.iloc[-1,-1]
last_row
annualized_return2 = (last_row ** (1/column_length2)) ** (252) - 1
#Annualized Return for the market over similar time frame
print(annualized_return2)