In [1]:
# import packages
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, log_loss
from bt_classes import my_backtest, test_indicator
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
#importing required libraries
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM, CuDNNLSTM
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from tqdm import tqdm
from sa import *
from utils import *
# follow the literature
# we don't use min-max scaling here, use partial mean-std scaling instead
from sklearn.preprocessing import StandardScaler
from itertools import chain
rcParams['figure.figsize'] = 20,10
# df = pd.read_csv('../res/input0130.csv')

orig_df = pd.read_csv('../xau_1d_20y.csv')
orig_df['datetime'] = pd.to_datetime(orig_df['date'])
orig_df = orig_df.set_index('datetime')

df = orig_df.copy()
df['log_r'] = np.log(df['close']) - np.log(df['open'])
df['label'] = np.sign(df['log_r'].shift(-1))
df['label'][df['label']==-1] = 0
df['label'].fillna(0,inplace=True)

Using TensorFlow backend.


In [2]:
# and we define our model here
def lstm_model(sample_len=240,para_a=42, para_b=17,drop1=0.05,drop2=0.02):
    model = Sequential()
    # model.add(LSTM(units=para_a, dropout=0.1, return_sequences=True, input_shape=(sample_len,1),activation='tanh'))# (25,15)-57, (42,17)-58
    # model.add(LSTM(units=para_b, dropout=0.08, activation='tanh'))
    model.add(CuDNNLSTM(units=para_a, return_sequences=True, input_shape=(sample_len,1)))# (25,15)-57, (42,17)-58
    model.add(Dropout(drop1))
    model.add(Activation('tanh'))
    model.add(CuDNNLSTM(units=para_b))
    model.add(Dropout(drop2))
    model.add(Activation('tanh'))
    # model.add(Dropout(0.08))# 加了之后同原先效果差不多，（应该一定程度上）可以防止过拟合
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [3]:
# grid search here:
grid_result = []
for sample_len in [60,120]:
    for p1 in [48,96,192]:
        for p2 in [48,96,192]:
            for epoch in [30,60]:
                for batch_size in [100,200]:
                    result = []
                    for train_begin in range(sample_len,len(df)-2000,1500):
                        train_begin = sample_len
                        train_end = train_begin + 1500
                        scaler = StandardScaler()
                        train_set = df[['log_r','label']][train_begin-sample_len:train_end].reset_index()
                        x_train, y_train = [], []
                        x_train_set = list(chain.from_iterable(scaler.fit_transform(train_set['log_r'].values.reshape(-1,1))))
                        for i in range(sample_len,len(x_train_set)):
                            x_train.append(x_train_set[i-sample_len:i])
                            y_train.append(train_set['label'][i-1])
                        x_train, y_train = np.array(x_train), np.array(y_train)
                        y_train = to_categorical(y_train,2)
                        x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1)) 

                        test_len = 500
                        test_begin = train_end + sample_len
                        test_end = test_begin + test_len

                        x_test, y_test = [], []
                        test_set = df[['log_r','label']][test_begin-sample_len:test_end].reset_index()
                        test_df = df[test_begin:test_end]
                        x_test_set = list(chain.from_iterable(scaler.transform(test_set['log_r'].values.reshape(-1,1))))
                        for i in range(sample_len,len(x_test_set)):
                            x_test.append(x_test_set[i-sample_len:i])
                            y_test.append(test_set['label'][i-1])
                        test_df['y_true'] = y_test
                        x_test, y_test = np.array(x_test), np.array(y_test)
                        x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1)) 
                        y_test = to_categorical(y_test,2)
                        model = lstm_model(sample_len=sample_len,para_a=p1,para_b=p2)
                        model.fit(x_train,y_train,epochs=epoch, batch_size=batch_size, callbacks=[EarlyStopping(monitor='loss',patience=10)],verbose=0)
                        train_loss,train_acc = model.evaluate(x_train, y_train,verbose=0)
                        test_loss,test_acc = model.evaluate(x_test, y_test,verbose=0)
                        y_pred = model.predict_classes(x_test)
                        test_df['y_pred'] = y_pred
                        test_df['log_profit'] = 2*(test_df['y_pred']-0.5)*test_df['log_r']
                        win_profit = test_df['log_profit'].loc[test_df['y_true']==test_df['y_pred']].mean()
                        lose_profit = test_df['log_profit'].loc[test_df['y_true']!=test_df['y_pred']].mean()
                        wtl = abs(win_profit / lose_profit)
                        this_result = [sample_len,p1,p2,epoch,batch_size,train_loss,train_acc,test_loss,test_acc,wtl]
                        result.append(this_result)
                        del train_set,x_train,y_train,x_train_set,test_set,test_df,x_test_set,model,
                    print(result)
                    grid_result.append(np.mean(result,axis=0).tolist())
                    res = pd.DataFrame(grid_result,columns=['sample_len','p1','p2','epoch','batch_size','train_loss','train_acc','test_loss','test_acc','wtl'])
                    res.to_csv(f'grid_result_{sample_len}.csv')


[[60, 48, 48, 30, 100, 0.6807578845024109, 0.5460000038146973, 0.7212267875671386, 0.5400000214576721, 1.0592667669367344], [60, 48, 48, 30, 100, 0.6866096304257711, 0.5446666479110718, 0.7019487013816833, 0.4959999918937683, 1.0460477487509325], [60, 48, 48, 30, 100, 0.6849220202763875, 0.5586666464805603, 0.7118674879074096, 0.49799999594688416, 0.9477947624620856]]
[[60, 48, 48, 30, 200, 0.6885439569155375, 0.5453333258628845, 0.6990001997947692, 0.5099999904632568, 0.9715730608091963], [60, 48, 48, 30, 200, 0.6876777048110962, 0.5373333096504211, 0.7025680689811706, 0.5080000162124634, 0.9665146837515738], [60, 48, 48, 30, 200, 0.6885121041933695, 0.5493333339691162, 0.6975827612876893, 0.5080000162124634, 0.9825331382580282]]
[[60, 48, 48, 60, 100, 0.647651965936025, 0.6193333268165588, 0.7583066511154175, 0.5299999713897705, 1.049532175230887], [60, 48, 48, 60, 100, 0.6338046058019002, 0.6259999871253967, 0.7746945958137512, 0.5400000214576721, 1.1059004357659195], [60, 48, 48, 

[[60, 96, 192, 60, 100, 0.58097212044398, 0.6786666512489319, 0.870072624206543, 0.5220000147819519, 1.007388410419845], [60, 96, 192, 60, 100, 0.5284601003328959, 0.7080000042915344, 1.166929744720459, 0.49399998784065247, 1.0630638337111828], [60, 96, 192, 60, 100, 0.5445181152025859, 0.7080000042915344, 1.0624764070510864, 0.492000013589859, 1.0363240683150565]]
[[60, 96, 192, 60, 200, 0.6544574491182963, 0.6013333201408386, 0.7999255304336548, 0.5120000243186951, 1.080029907529723], [60, 96, 192, 60, 200, 0.614715764204661, 0.6446666717529297, 0.8712918510437012, 0.48399999737739563, 1.058622399562624], [60, 96, 192, 60, 200, 0.6411691045761109, 0.6073333621025085, 0.8075433430671692, 0.5120000243186951, 1.0857739448443091]]
[[60, 192, 48, 30, 100, 0.6865624300638835, 0.5473333597183228, 0.7110129289627075, 0.5139999985694885, 1.0365497351653332], [60, 192, 48, 30, 100, 0.6818092157046001, 0.5526666641235352, 0.7224830479621888, 0.5139999985694885, 0.9774725659021862], [60, 192, 48

[[120, 48, 192, 30, 200, 0.685417492389679, 0.5486666560173035, 0.6966014676094056, 0.5180000066757202, 1.0443591199540418], [120, 48, 192, 30, 200, 0.6862351009051005, 0.5446666479110718, 0.6950741167068482, 0.5299999713897705, 1.0149254192638575], [120, 48, 192, 30, 200, 0.684317459265391, 0.550000011920929, 0.7023462481498718, 0.5220000147819519, 1.0014304971088182]]
[[120, 48, 192, 60, 100, 0.6153585015932719, 0.6386666893959045, 0.8793460073471069, 0.515999972820282, 1.0104972536877979], [120, 48, 192, 60, 100, 0.6068195137182871, 0.640666663646698, 0.8720148043632507, 0.5099999904632568, 0.9808289745781896], [120, 48, 192, 60, 100, 0.6216777888933818, 0.6393333077430725, 0.8311578793525696, 0.48399999737739563, 1.0659734071582199]]
[[120, 48, 192, 60, 200, 0.6672737822532654, 0.5793333053588867, 0.7369736604690552, 0.5080000162124634, 1.0888742823370399], [120, 48, 192, 60, 200, 0.6678167868455251, 0.5740000009536743, 0.7702910165786743, 0.5339999794960022, 1.0396652903299326], [

[[120, 192, 192, 30, 100, 0.6816466488838195, 0.5633333325386047, 0.6979605231285095, 0.4959999918937683, 1.0566484250679946], [120, 192, 192, 30, 100, 0.6786628034909566, 0.5580000281333923, 0.7080155081748962, 0.4959999918937683, 1.0833948065054126], [120, 192, 192, 30, 100, 0.6839175753593445, 0.5299999713897705, 0.7015544939041137, 0.5419999957084656, 1.1068065563962917]]
[[120, 192, 192, 30, 200, 0.6872832403182983, 0.5326666831970215, 0.6978034548759461, 0.5099999904632568, 1.058412993290249], [120, 192, 192, 30, 200, 0.7023577151298523, 0.5199999809265137, 0.6977292671203613, 0.5559999942779541, 0.9545398310277329], [120, 192, 192, 30, 200, 0.6845793523788453, 0.5406666398048401, 0.7090926442146301, 0.5239999890327454, 1.0126328782395584]]
[[120, 192, 192, 60, 100, 0.32796500237782794, 0.8539999723434448, 1.3832539081573487, 0.49000000953674316, 0.9587613353801478], [120, 192, 192, 60, 100, 0.4931579371293386, 0.734000027179718, 0.9647010469436645, 0.5379999876022339, 1.04660476