In [1]:
# import packages
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, log_loss
from bt_classes import my_backtest, test_indicator
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
#importing required libraries
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM, CuDNNLSTM
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from tqdm import tqdm
from sa import *
from utils import *
# follow the literature
# we don't use min-max scaling here, use partial mean-std scaling instead
from sklearn.preprocessing import StandardScaler
from itertools import chain
rcParams['figure.figsize'] = 20,10
# df = pd.read_csv('../res/input0130.csv')

orig_df = pd.read_csv('../xau_1d_20y.csv')
orig_df['datetime'] = pd.to_datetime(orig_df['date'])
orig_df = orig_df.set_index('datetime')

df = orig_df.copy()
df['log_r'] = np.log(df['close']) - np.log(df['open'])
df['label'] = np.sign(df['log_r'].shift(-1))
df['label'][df['label']==-1] = 0
df['label'].fillna(0,inplace=True)

Using TensorFlow backend.


In [2]:
# and we define our model here
def lstm_model(sample_len=240,para_a=42, para_b=17,drop1=0.05,drop2=0.02):
    model = Sequential()
    # model.add(LSTM(units=para_a, dropout=0.1, return_sequences=True, input_shape=(sample_len,1),activation='tanh'))# (25,15)-57, (42,17)-58
    # model.add(LSTM(units=para_b, dropout=0.08, activation='tanh'))
    model.add(CuDNNLSTM(units=para_a, return_sequences=True, input_shape=(sample_len,1)))# (25,15)-57, (42,17)-58
    model.add(Dropout(drop1))
    model.add(Activation('tanh'))
    model.add(CuDNNLSTM(units=para_b))
    model.add(Dropout(drop2))
    model.add(Activation('tanh'))
    # model.add(Dropout(0.08))# 加了之后同原先效果差不多，（应该一定程度上）可以防止过拟合
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [None]:
# grid search here:
grid_result = []
for sample_len in [15,30]:
    for p1 in [48,96,192]:
        for p2 in [48,96,192]:
            for epoch in [30,60]:
                for batch_size in [100,200]:
                    result = []
                    for train_begin in range(sample_len,len(df)-2000,1500):
                        train_begin = sample_len
                        train_end = train_begin + 1500
                        scaler = StandardScaler()
                        train_set = df[['log_r','label']][train_begin-sample_len:train_end].reset_index()
                        x_train, y_train = [], []
                        x_train_set = list(chain.from_iterable(scaler.fit_transform(train_set['log_r'].values.reshape(-1,1))))
                        for i in range(sample_len,len(x_train_set)):
                            x_train.append(x_train_set[i-sample_len:i])
                            y_train.append(train_set['label'][i-1])
                        x_train, y_train = np.array(x_train), np.array(y_train)
                        y_train = to_categorical(y_train,2)
                        x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1)) 

                        test_len = 500
                        test_begin = train_end + sample_len
                        test_end = test_begin + test_len

                        x_test, y_test = [], []
                        test_set = df[['log_r','label']][test_begin-sample_len:test_end].reset_index()
                        test_df = df[test_begin:test_end]
                        x_test_set = list(chain.from_iterable(scaler.transform(test_set['log_r'].values.reshape(-1,1))))
                        for i in range(sample_len,len(x_test_set)):
                            x_test.append(x_test_set[i-sample_len:i])
                            y_test.append(test_set['label'][i-1])
                        test_df['y_true'] = y_test
                        x_test, y_test = np.array(x_test), np.array(y_test)
                        x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1)) 
                        y_test = to_categorical(y_test,2)
                        model = lstm_model(sample_len=sample_len,para_a=p1,para_b=p2)
                        model.fit(x_train,y_train,epochs=epoch, batch_size=batch_size, callbacks=[EarlyStopping(monitor='loss',patience=10)],verbose=0)
                        train_loss,train_acc = model.evaluate(x_train, y_train,verbose=0)
                        test_loss,test_acc = model.evaluate(x_test, y_test,verbose=0)
                        y_pred = model.predict_classes(x_test)
                        test_df['y_pred'] = y_pred
                        test_df['log_profit'] = 2*(test_df['y_pred']-0.5)*test_df['log_r']
                        win_profit = test_df['log_profit'].loc[test_df['y_true']==test_df['y_pred']].mean()
                        lose_profit = test_df['log_profit'].loc[test_df['y_true']!=test_df['y_pred']].mean()
                        wtl = abs(win_profit / lose_profit)
                        this_result = [sample_len,p1,p2,epoch,batch_size,train_loss,train_acc,test_loss,test_acc,wtl]
                        result.append(this_result)
                    print(result)
                    grid_result.append(np.mean(result,axis=0).tolist())
                    res = pd.DataFrame(grid_result,columns=['sample_len','p1','p2','epoch','batch_size','train_loss','train_acc','test_loss','test_acc','wtl'])
                    res.to_csv(f'grid_result_{sample_len}.csv')




[[15, 48, 48, 30, 100, 0.6835815704663595, 0.5526666641235352, 0.7459641981124878, 0.4699999988079071, 1.0294309197009317], [15, 48, 48, 30, 100, 0.6860315124193828, 0.5573333501815796, 0.7084885816574097, 0.5040000081062317, 0.9984016297680356], [15, 48, 48, 30, 100, 0.6851288811365763, 0.5453333258628845, 0.7274173169136048, 0.48399999737739563, 1.0675803571786175]]
[[15, 48, 48, 30, 200, 0.6874014088312785, 0.5433333516120911, 0.7208447275161743, 0.49799999594688416, 0.989349374596915], [15, 48, 48, 30, 200, 0.6872293017705282, 0.5419999957084656, 0.7163364753723145, 0.49399998784065247, 0.9555670427404062], [15, 48, 48, 30, 200, 0.6881373391151429, 0.5333333611488342, 0.7216811270713807, 0.4740000069141388, 0.9911550357652333]]
[[15, 48, 48, 60, 100, 0.6558367225329081, 0.5839999914169312, 0.8177469816207886, 0.5260000228881836, 1.0372842313332367], [15, 48, 48, 60, 100, 0.6603273526827494, 0.5839999914169312, 0.747037931919098, 0.515999972820282, 1.0031427186349582], [15, 48, 48,