This repository has been archived by the owner on Jul 14, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
backtesting.py
417 lines (320 loc) · 15 KB
/
backtesting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
#%%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import timedelta as td
#get_ipython().run_line_magic('matplotlib', 'inline')
plt.rcParams['figure.figsize'] = [24, 16]
#%%
# To know the memory usage, just in case out of memory
import os, psutil
def memory_usage():
process = psutil.Process(os.getpid())
return process.memory_info()[0] / float(2 ** 20)
memory_usage() # initial memory usage
#%% [markdown]
# # The workflow: Indicator, Signal and strategy
#%% [markdown]
# ## The column name protocols between different stages and staff
# So that the data source can be interpreted by strategy
#%%
def ma_col_name(nperiods):
return "MA"+str(nperiods)
def pre_ma_col_name(nperiods):
return "Pre_"+ma_col_name(nperiods)
def crossup_col_name(ma_short, ma_long):
return str(ma_short)+"_CU_"+str(ma_long)
def crossdown_col_name(ma_short, ma_long):
return str(ma_short)+"_CD_"+str(ma_long)
#%% [markdown]
# ## The MA parameters we will use
#%%
ma_range_short=list(range(5,10+1))
ma_range_long=list(range(25,35+1))
ma_range=ma_range_short+ma_range_long
ma_range
#%% [markdown]
# ## The Stoploss level we will use
#%%
stoploss_range=[x/100.0 for x in range(1,5+1)]
stoploss_range=[0.005]+stoploss_range+[1]
stoploss_range
#%% [markdown]
# # Backtesting
#%% [markdown]
# # To Load Market Data Indicators and Signals
# So that the step 1, 2 are independent to back testing
#%%
df=pd.read_csv("signals.csv" ,parse_dates=["datetime"],index_col="datetime")
df.sort_index(ascending=True,inplace=True)
# Test df.iloc[0]
#%% [markdown]
# ## Backtesting
#%%
def strategy_test(df, ma_short=5, ma_long=25, stoploss_rate=0.05):
"""Backtest for a set of (MA short periods, MA long periods, stop loss rate)"""
ticker_holdings = [] #Holding of ticks
ticker_trades=[]
ticker_stoploss=[]
trades_pl=[]
#stoplosses_pl=[]
cash_flows=[] #Cash flow (In when sell and out when buy)
current_holding=0
previous_cost_Buys=0
previous_cost_Sells=0
initial_cash=0 # The first investment as cash
crossdown=crossdown_col_name(ma_short,ma_long)
crossup=crossup_col_name(ma_short,ma_long)
for index, row in df.iterrows():
net_cash_flow=0
trades=0 #non-cover
stoploss_trades=0 #cover
trade_pl=0
#stoploss_pl=0
if stoploss_rate<1: # When stoploss>=1, which means no stoploss!
# stop loss first
if current_holding>0 and row["close"]<((1-stoploss_rate)*previous_cost_Buys): # stop loss for long
stoploss_trades=-current_holding
current_holding=0
net_cash_flow=(1-stoploss_rate)*previous_cost_Buys*abs(stoploss_trades)
#stoploss_pl=-stoploss_rate # The loss
trade_pl=-stoploss_rate # The loss
elif current_holding<0 and row['close']>(1+stoploss_rate)*previous_cost_Sells: # stop loss for short
stoploss_trades=-current_holding
current_holding=0
#stoploss_pl=-stoploss_rate
trade_pl=-stoploss_rate # The loss
net_cash_flow=-(1+stoploss_rate)*previous_cost_Sells*abs(stoploss_trades)
bool_crossdown=row[crossdown]
bool_crossup=row[crossup]
# Then we deal with buy and sell signal
if bool_crossup:
if (current_holding<0): # cover short
trade_pl=-(row["close"]/previous_cost_Sells-1)
current_holding=current_holding+1
net_cash_flow=net_cash_flow-row["close"]
previous_cost_Buys=row["close"]
trades=1
if initial_cash==0:
initial_cash=row["close"]
elif bool_crossdown:
if (current_holding>0): # cover long
trade_pl=row["close"]/previous_cost_Buys-1
current_holding=current_holding-1
net_cash_flow=net_cash_flow+row["close"]
previous_cost_Sells=row["close"]
trades=-1
if initial_cash==0:
initial_cash=row["close"]
ticker_holdings.append(current_holding)
cash_flows.append(net_cash_flow)
ticker_trades.append(trades)
ticker_stoploss.append(stoploss_trades)
trades_pl.append(trade_pl)
#stoplosses_pl.append(stoploss_pl)
df_strategy=pd.DataFrame({"close":df["close"],"holding": ticker_holdings,"cash_flows":cash_flows, "trades":ticker_trades,"stoploss":ticker_stoploss, "trades_pl":trades_pl #,"stoplosses_pl":stoplosses_pl
},index=df.index)
df_strategy["cum_cash_flow"]=df_strategy["cash_flows"].cumsum()
df_strategy["cash"]=pd.Series([initial_cash+cum_cf for cum_cf in df_strategy["cum_cash_flow"]],index=df.index)
df_strategy["strategy"]=df_strategy.holding*df_strategy.close+df_strategy["cash"]
df_strategy["strategy_pl_cum"]=df_strategy["strategy"]/initial_cash-1
return df_strategy
#%% [markdown]
# ## We define the metrics
#%%
# global parameters we share between
annual_trading_days=252
risk_free=0.01 # for 2018 the low interest rate
risk_free_5m=(1+risk_free)**(1/(annual_trading_days*12*24))-1
annualized_parameter=annual_trading_days**0.5
target_daily_return=0 # We use 0 as the target daily return
#%%
risk_free_5m
#%% [markdown]
# ### Sharpe Ratio
#%%
def sharpe_ratio(values):
"""To calculate Sharpe Ratio by portfolio values"""
daily_return=values.pct_change()
sharpe_ratio=(daily_return.mean()-risk_free_5m)/daily_return.std()
sharpe_ratio_annual=annualized_parameter*sharpe_ratio
return sharpe_ratio,sharpe_ratio_annual
#%% [markdown]
# ### Sortino Ratio
#%%
def sortino_ratio(values,target_return=0):
"""To calculate Sortino Ratio by portfolio values"""
daily_return=values.pct_change()
expected_return = daily_return.mean()
df_temp=daily_return.to_frame("daily")
sq_mean=df_temp.applymap(lambda x: (x-target_return)**2 if x<target_return else 0).mean()[0]
#print(sq_mean)
down_stdev = np.sqrt(sq_mean)
sortino_ratio = (expected_return - risk_free_5m)/down_stdev
sortino_ratio_annual=annualized_parameter*sortino_ratio
return sortino_ratio, sortino_ratio_annual
#%% [markdown]
# ### Maximum Drawdown
#%%
def max_drawdown(values):
"""To calculate maximum drawdown"""
return 1-min(values/values.cummax()) # We use positive value
#%%
def strategy_metrics(value_serie):
"""To calculate all required performance indicators"""
sharpe,sharpe_annual=sharpe_ratio(value_serie)
sortino, sortino_annual=sortino_ratio(value_serie)
max_dd=max_drawdown(value_serie)
return sharpe, sortino,max_dd
#%%
# clear global variables
df_metrics=pd.DataFrame()
series=[]
dfs={}
#%%
def all_strategy_test(ma_range_short,ma_range_long,stoploss_range):
"""Backtest parameter combinations of MA short range, MA long range and stop loss range."""
global df_metrics,series,dfs
df_metrics=pd.DataFrame()
series=[]
dfs={}
for ma_short in ma_range_short:
for ma_long in ma_range_long:
for stoploss in stoploss_range:
strategy_name=str(ma_short)+"_"+str(ma_long)+"_"+str(stoploss)
df_strategy=strategy_test(df, ma_short, ma_long, stoploss)
dfs[strategy_name]=df_strategy
sharpe_ratio, sortino,max_dd=strategy_metrics(df_strategy.strategy)
trades_total=(abs(df_strategy.trades)+abs(df_strategy.stoploss)).sum()
trades_mean=df_strategy.trades_pl.mean()
final_pl=df_strategy.strategy_pl_cum.iloc[-1]
series.append(pd.Series({"strategy_name":strategy_name,"ma_short":ma_short,"ma_long":ma_long, "stoploss": stoploss,"sharpe_ratio":sharpe_ratio,"sortino_ratio":sortino, "max_drawdown":max_dd, "Final_Return":final_pl,"Total_Trades":trades_total,"Average_Trades_PL":trades_mean }))
df_metrics=pd.concat(series,ignore_index=True,axis=1).T
df_metrics.set_index('strategy_name', inplace=True)
df_metrics.ma_short=df_metrics.ma_short.astype("int64")
df_metrics.ma_long=df_metrics.ma_long.astype("int64")
df_metrics.stoploss=df_metrics.stoploss.astype("float64")
df_metrics.sharpe_ratio =df_metrics.sharpe_ratio.astype("float64")
df_metrics.sortino_ratio =df_metrics.sortino_ratio.astype("float64")
df_metrics.max_drawdown=df_metrics.max_drawdown.astype("float64")
df_metrics.Final_Return=df_metrics.Final_Return.astype("float64")
df_metrics.max_drawdown=df_metrics.max_drawdown.astype("float64")
df_metrics.Total_Trades=df_metrics.Total_Trades.astype("int64")
df_metrics.Average_Trades_PL=df_metrics.Average_Trades_PL.astype("float64")
#%% [markdown]
# ## To Back Test All Strategy Combinations (about 25 minutes)
#%%
if __name__ == '__main__':
# clear global variables
df_metrics=pd.DataFrame()
series=[]
dfs={}
#%%
def adjust_datatype(df_metrics):
df_metrics.set_index('strategy_name', inplace=True)
df_metrics.ma_short=df_metrics.ma_short.astype("int64")
df_metrics.ma_long=df_metrics.ma_long.astype("int64")
df_metrics.stoploss=df_metrics.stoploss.astype("float64")
df_metrics.sharpe_ratio =df_metrics.sharpe_ratio.astype("float64")
df_metrics.sortino_ratio =df_metrics.sortino_ratio.astype("float64")
df_metrics.max_drawdown=df_metrics.max_drawdown.astype("float64")
df_metrics.Final_Return=df_metrics.Final_Return.astype("float64")
df_metrics.max_drawdown=df_metrics.max_drawdown.astype("float64")
df_metrics.Total_Trades=df_metrics.Total_Trades.astype("int64")
df_metrics.Average_Trades_PL=df_metrics.Average_Trades_PL.astype("float64")
def all_strategy_test(ma_range_short,ma_range_long,stoploss_range):
"""Backtest parameter combinations of MA short range, MA long range and stop loss range."""
global df_metrics,series,dfs
df_metrics=pd.DataFrame()
series=[]
dfs={}
for ma_short in ma_range_short:
for ma_long in ma_range_long:
for stoploss in stoploss_range:
strategy_name=str(ma_short)+"_"+str(ma_long)+"_"+str(stoploss)
df_strategy=strategy_test(df, ma_short, ma_long, stoploss)
dfs[strategy_name]=df_strategy
sharpe_ratio, sortino,max_dd=strategy_metrics(df_strategy.strategy)
trades_total=(abs(df_strategy.trades)+abs(df_strategy.stoploss)).sum()
trades_mean=df_strategy.trades_pl.mean()
final_pl=df_strategy.strategy_pl_cum.iloc[-1]
series.append(pd.Series({"strategy_name":strategy_name,"ma_short":ma_short,"ma_long":ma_long,\
"stoploss": stoploss,"sharpe_ratio":sharpe_ratio,"sortino_ratio":sortino,\
"max_drawdown":max_dd,\
"Final_Return":final_pl,"Total_Trades":trades_total,"Average_Trades_PL":trades_mean\
}))
df_metrics=pd.concat(series,ignore_index=True,axis=1).T
adjust_datatype(df_metrics)
#%% [markdown]
# ## MultiProcessing (about 3? minutes) Version--To Back Test All Strategy Combinations
#%%
def strategy_test_process(df_dict,series,global_df, ma_short=5, ma_long=25, stoploss_rate=0.05):
strategy_name=str(ma_short)+"_"+str(ma_long)+"_"+str(stoploss_rate)
df_strategy=strategy_test(global_df,ma_short,ma_long,stoploss_rate)
df_dict[strategy_name]=df_strategy
#df_dict[strategy_name]=strategy_name
sharpe_ratio, sortino,max_dd=strategy_metrics(df_strategy.strategy)
trades_total=(abs(df_strategy.trades)+abs(df_strategy.stoploss)).sum()
trades_mean=df_strategy.trades_pl.mean()
final_pl=df_strategy.strategy_pl_cum.iloc[-1]
series.append(pd.Series({"strategy_name":strategy_name,"ma_short":ma_short,"ma_long":ma_long,\
"stoploss": stoploss_rate,"sharpe_ratio":sharpe_ratio,"sortino_ratio":sortino,\
"max_drawdown":max_dd,\
"Final_Return":final_pl,"Total_Trades":trades_total,"Average_Trades_PL":trades_mean\
}))
#print(strategy_name+" Average_Trades_PL:"+str(trades_mean))
# Step 1: Redefine, to accept `i`, the iteration number
def howmany_within_range2(i, row, minimum, maximum):
"""Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
count = 0
for n in row:
if minimum <= n <= maximum:
count = count + 1
return (i, count)
import multiprocessing as mp
from multiprocessing import Manager
#print(mp.cpu_count())
#print([(ma_short, ma_long, stoploss_rate) for ma_short in list([5,6]) for ma_long in list([25,26]) for stoploss_rate in list([0.01,0.02])])
if __name__ == '__main__':
mp.freeze_support()
#mp.set_start_method("spawn")
pool = mp.Pool(mp.cpu_count())
processingtest=False
if processingtest:
# Parallelizing with Pool.starmap_async()
import numpy as np
from time import time
# Prepare data
#np.random.RandomState(100)
arr = np.random.randint(0, 10, size=[2000, 5])
data = arr.tolist()
#results = []
results = pool.starmap_async(howmany_within_range2, [(i, row, 4, 8) for i, row in enumerate(data)]).get()
pool.close()
print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]
else:
with Manager() as manager:
t_begin=dt.datetime.now()
df_dict = manager.dict()
series = manager.list()
#global df_metrics,series,dfs
#df_metrics=pd.DataFrame()
tasks=[]
rows=[(df_dict, series, df, ma_short, ma_long, stoploss_rate) \
for ma_short in ma_range_short for ma_long in ma_range_long for stoploss_rate in stoploss_range]
#for ma_short in list([5,6]) for ma_long in list([25,26]) for stoploss_rate in list([0.01,0.02])]
results=[]
results=pool.starmap_async(strategy_test_process,rows).get()
t_end=dt.datetime.now()
print(t_end-t_begin)
#pool.join()
pool.close()
#adjust_datatype(series)
#df_test=df_dict["10_35_0.04"]
#print(df_test.head())
df_metrics=pd.concat(series,ignore_index=True,axis=1).T
adjust_datatype(df_metrics)
df_metrics.to_csv("metrics-processing.csv")
#print(df_metrics)