In [1]:
# import packages
import pandas as pd
from bt_classes import *
orig_df = pd.read_csv('../res/test_data.csv',index_col=0,parse_dates=True)
my_backtest(orig_df[:500])

Starting Portfolio Value: 100000.00
Final Portfolio Value: 269972.00
Sharpe: 5.45
Max drawdown: 0.92%
Annual rate: 64.96%


In [8]:
# import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10
df = pd.read_csv('../res/input0130.csv')
orig_df = pd.read_csv('../xau_1d_20y.csv')
orig_df['date'] = pd.to_datetime(orig_df['date'])
orig_df = orig_df.set_index('date')

In [4]:
#importing required libraries
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
# follow the literature
# we don't use min-max scaling here, use partial mean-std scaling instead
from sklearn.preprocessing import StandardScaler
from itertools import chain

# and we define our model here
def lstm_model(para_a=42, para_b=17):
    model = Sequential()
    model.add(LSTM(units=para_a, dropout=0.1, return_sequences=True, input_shape=(240,1),activation='tanh'))# (25,15)-57, (42,17)-58
    model.add(LSTM(units=para_b, dropout=0.08, activation='tanh'))
    # model.add(Dropout(0.08))# 加了之后同原先效果差不多，（应该一定程度上）可以防止过拟合
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

Using TensorFlow backend.


## Method 1, train for only one time (many many days, maybe 2000?) and test several times(let's try 250 days per test). 

In [10]:
train_begin = 240
train_end = train_begin + 2000
scaler = StandardScaler()
train_set = df[['log_r','label']][train_begin-240:train_end].reset_index()
x_train, y_train = [], []
x_train_set = list(chain.from_iterable(scaler.fit_transform(train_set['log_r'].values.reshape(-1,1))))
for i in range(240,len(x_train_set)):
    x_train.append(x_train_set[i-240:i])
    y_train.append(train_set['label'][i])
x_train, y_train = np.array(x_train), np.array(y_train)
y_train = to_categorical(y_train,2)
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1)) 

model = lstm_model()
model.fit(x_train,y_train,epochs=30, batch_size=100, callbacks=[EarlyStopping(monitor='loss',patience=10)])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x1c5315c7a08>

In [11]:
train_df = orig_df[train_begin:train_end]
train_df['label'] = model.predict_classes(x_train)
my_backtest(train_df)

Starting Portfolio Value: 100000.00
Final Portfolio Value: 197341.00
Sharpe: 1.03
Max drawdown: 10.73%
Annual rate: 8.94%


In [12]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 240, 42)           7392      
_________________________________________________________________
lstm_6 (LSTM)                (None, 17)                4080      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 36        
Total params: 11,508
Trainable params: 11,508
Non-trainable params: 0
_________________________________________________________________


In [19]:
model.save('newest_lstm.h5')

In [23]:
def my_backtest(mydata, log=False, drawpic=False, iplot=False):
    plt.rcParams['figure.figsize'] = 12,8
    cerebro = bt.Cerebro()
    data = MyPandasData(dataname=mydata)
    cerebro.adddata(data)
    if log:
        cerebro.addstrategy(GoldStrategy)
    else:
        cerebro.addstrategy(GoldStrategy_nolog)
    cerebro.addsizer(OptInvest)
    init_value= 100000.0
    cerebro.broker.setcash(init_value)
    cerebro.broker.setcommission(commission=50,margin=1000,mult=100)
    cerebro.addanalyzer(bt.analyzers.SharpeRatio_A, _name='sharpe')
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trade')
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name='dd')
    cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
    cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='ar')
    print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
    thestrats = cerebro.run()
    final_value = cerebro.broker.getvalue()
    print('Final Portfolio Value: %.2f' % final_value)
    if drawpic:
        cerebro.plot(iplot=iplot,volume=False)
    sharpe = thestrats[0].analyzers.sharpe.get_analysis()['sharperatio']
    dd = thestrats[0].analyzers.dd.get_analysis()['max']['drawdown']
    ar = thestrats[0].analyzers.returns.get_analysis()['rnorm100']
    print('Sharpe: {:.2f}'.format(sharpe))
    print('Max drawdown: {:.2f}%'.format(dd))
    print('Annual rate: {:.2f}%'.format(ar))
    return sharpe,dd,ar

In [14]:
from tqdm import tqdm
from sa import *
from utils import *

# Please select the last activation layer.
layer_names = ['lstm_6']

default_upper_bound = 2000
default_n_bucket = 1000
default_n_classes = 2
class Args(): #创建一个类
    def __init__(self): #定义初始化信息。
        self.is_classification = True
        self.save_path = './tmp/'
        self.d = 'lstm_r'
        self.num_classes = 2
        self.lsa = True
        self.dsa = True
        self.target = 'none'
        self.batch_size = 128
        self.var_threshold = 1e-5
        self.upper_bound = 2000
        self.n_bucket = 1000
        self.is_classification = True
args = Args()

In [41]:
starter = range(2240,len(df)-250,250)
all_results = []

for test_begin in starter:
    test_end = test_begin + 250

    x_test, y_test = [], []
    test_set = df[['log_r','label']][test_begin-240:test_end].reset_index()
    x_test_set = list(chain.from_iterable(scaler.transform(test_set['log_r'].values.reshape(-1,1))))
    for i in range(240,len(x_test_set)):
        x_test.append(x_test_set[i-240:i])
        y_test.append(test_set['label'][i])
    x_test, y_test = np.array(x_test), np.array(y_test)
    x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1)) 
    y_test = to_categorical(y_test,2)

    test_dsa = fetch_dsa(model, x_train, x_test, 'dsa from{}'.format(test_begin), layer_names, args)
    dsa_cov = get_sc(np.amin(test_dsa), 5, 20, test_dsa)
    test_lsa = fetch_lsa(model, x_train, x_test, 'lsa from{}'.format(test_begin), layer_names, args)
    lsa_cov = get_sc(np.amin(test_lsa), 1500, 20, test_lsa)
    eva = model.evaluate(x_test, y_test)
    print('Loss: {:.2f}, Accuracy: {:.2f}'.format(eva[0], eva[1]))
    print('Dsa Mean: {:.2f}, Var: {:.2f}, Max: {:.2f}, Min: {:.2f}'.format(np.mean(test_dsa),np.std(test_dsa),np.max(test_dsa),np.min(test_dsa)))
    print('Lsa Mean: {:.2f}, Var: {:.2f}, Max: {:.2f}, Min: {:.2f}'.format(np.mean(test_lsa),np.std(test_lsa),np.max(test_lsa),np.min(test_lsa)))
    test_df = orig_df[test_begin:test_end]
    test_df['label'] = model.predict_classes(x_test)
    sharpe,dd,ar = my_backtest(test_df)
    this_result = [test_begin,eva[0], eva[1],dsa_cov,np.mean(test_dsa),np.std(test_dsa),np.max(test_dsa),np.min(test_dsa),lsa_cov,np.mean(test_lsa),np.std(test_lsa),np.max(test_lsa),np.min(test_lsa),sharpe,dd,ar]
    all_results.append(this_result)

100%|██████████| 250/250 [00:00<00:00, 2071.09it/s]
kde: 100%|██████████| 2/2 [00:00<00:00, 1017.91it/s]
  0%|          | 0/250 [00:00<?, ?it/s][92mFound saved train ATs, skip serving[0m
[92mFound saved dsa from2240 ATs, skip serving[0m
[94m[dsa from2240] [0mFetching DSA
[92mFound saved train ATs, skip serving[0m
[92mFound saved lsa from2240 ATs, skip serving[0m
[92mThe number of removed columns: 0[0m
[94m[lsa from2240] [0mFetching LSA
100%|██████████| 250/250 [00:00<00:00, 3094.71it/s]
Loss: 0.70, Accuracy: 0.51
Dsa Mean: 0.55, Var: 0.56, Max: 3.11, Min: 0.05
Lsa Mean: 33.56, Var: 191.21, Max: 1248.25, Min: -69.53
 85%|████████▍ | 212/250 [00:00<00:00, 2104.59it/s]Starting Portfolio Value: 100000.00
Final Portfolio Value: 117198.00
Sharpe: 6.48
Max drawdown: 18.06%
Annual rate: 17.35%
[92mFound saved train ATs, skip serving[0m
[92mFound saved dsa from2490 ATs, skip serving[0m
[94m[dsa from2490] [0mFetching DSA
100%|██████████| 250/250 [00:00<00:00, 2089.43it/s]
kde

In [42]:
cols = ['test_begin','loss', 'accu','dsa_cov','dsa_mean','dsa_std','dsa_max','dsa_min','lsa_cov','lsa_mean','lsa_std','lsa_max','lsa_min','sharpe','drawdown','annual_return']
result_df = pd.DataFrame(all_results,columns=cols)

In [43]:
result_df

Unnamed: 0,test_begin,loss,accu,dsa_cov,dsa_mean,dsa_std,dsa_max,dsa_min,lsa_cov,lsa_mean,lsa_std,lsa_max,lsa_min,sharpe,drawdown,annual_return
0,2240,0.698456,0.508,55.0,0.546526,0.561222,3.114424,0.054373,70.0,33.559475,191.209889,1248.247018,-69.529467,6.480774,18.061717,17.346884
1,2490,0.686643,0.56,40.0,0.317835,0.340786,2.542331,0.023308,5.0,-65.261773,6.426795,-10.327409,-71.499709,1.854561,11.677169,25.690888
2,2740,0.686288,0.592,30.0,0.21234,0.210874,1.519107,0.026104,5.0,-68.219279,2.929719,-56.150657,-71.213576,1.907296,8.084511,52.014435
3,2990,0.692316,0.516,60.0,0.386541,0.567986,3.778342,0.034636,10.0,-53.712047,24.489958,75.9373,-71.245531,-0.725497,45.788927,-23.855881
4,3240,0.695842,0.496,20.0,0.159306,0.151618,1.108419,0.04106,40.0,-47.579933,94.988364,599.480046,-71.51606,-0.742162,37.7904,-26.183068
5,3490,0.695469,0.512,30.0,0.203545,0.206412,1.79044,0.041658,20.0,-56.834929,44.355334,527.081173,-70.800888,-0.631019,25.7556,-17.794944
6,3740,0.705449,0.46,20.0,0.15061,0.151649,1.019424,0.027984,5.0,-66.20358,6.490542,-28.38547,-71.68329,-0.321868,19.098936,-5.113536
7,3990,0.703024,0.476,40.0,0.198916,0.273698,2.456577,0.02101,5.0,-66.028288,5.759454,-30.715562,-71.66229,0.115954,16.995238,3.129217
8,4240,0.698567,0.52,30.0,0.177066,0.187026,1.695585,0.026664,5.0,-65.692088,6.39356,-19.154091,-71.137105,0.113021,20.682968,3.259281
9,4490,0.686407,0.548,10.0,0.116627,0.065245,0.348076,0.019122,5.0,-70.037435,1.063769,-66.059516,-71.731027,1.487392,9.705368,7.8305
