In [1]:
from tuneta.tune_ta import TuneTA
import pandas as pd
import numpy as np

import talib

PATH_READ_TRAIN = '123181_train_raw.csv'
PATH_READ_TEST = '123181_test_raw.csv'

df = pd.read_csv(PATH_READ_TRAIN, index_col='Timestamp')
df_test = pd.read_csv(PATH_READ_TEST, index_col='Timestamp')

df = df.drop(['InstrumentID','TradingDay','PreClosePrice',], axis=1)
df.index = pd.to_datetime(df.index)

df['Avg'] = talib.AVGPRICE(df['Open'], df['High'], df['Low'], df['Close'])
df_test['Avg'] = talib.AVGPRICE(df_test['Open'], df_test['High'], df_test['Low'], df_test['Close'])


PATH_STOCK_TRAIN = '301085_train_raw.csv'
PATH_STOCK_TEST = '301085_test_raw.csv'

stock = pd.read_csv(PATH_STOCK_TRAIN, index_col= 'Timestamp')
stock_test = pd.read_csv(PATH_STOCK_TEST, index_col= 'Timestamp')

stock = stock.drop(['InstrumentID','TradingDay','PreClosePrice',], axis=1)
stock.index = pd.to_datetime(stock.index)

stock['Avg'] = talib.AVGPRICE(stock['Open'], stock['High'], stock['Low'], stock['Close'])
stock_test['Avg'] = talib.AVGPRICE(stock_test['Open'], stock_test['High'], stock_test['Low'], stock_test['Close'])

print(df.columns)

Index(['Open', 'High', 'Low', 'Volume', 'Turnover', 'Close', 'Avg'], dtype='object')


In [2]:
def target(df, column, period):
    if period == 1:
        return (df[column].shift(-1) - df[column])/df[column]
    if period > 1:
        return (df[column].rolling(window=period).mean().shift(-1 * period) - df[column]) / df[column]

In [3]:
def class_target(value):
    if value > 0.001:
        return 1
    elif value < -0.001:
        return -1
    else:
        return 0

In [4]:
df['Target_Close_1min'] = target(df,'Close', 1)
df['Target_Avg_1min'] = target(df, 'Avg', 1)

df = df[df['Target_Close_1min'] != 0]
df = df.dropna()

df_test['Target_Close_1min'] = target(df_test,'Close', 1)
df_test['Target_Avg_1min'] = target(df_test, 'Avg', 1)

df_test = df_test[df_test['Target_Close_1min'] != 0]
df_test = df_test.dropna()

In [5]:
train_X = df[['Open',
            'High',
            'Low',
            'Volume',
            'Turnover',
            'Close']]

test_X = df_test[['Open',
            'High',
            'Low',
            'Volume',
            'Turnover',
            'Close']]

train_y = df['Target_Close_1min']
test_y = df_test['Target_Close_1min']

In [6]:
tt = TuneTA(n_jobs=8, verbose=True)

tt.fit(train_X, train_y,
# 优化指标
indicators=['tta'],
# 待优化参数的两个参数范围（时间的短期和长期）
ranges=[(4, 40)],
# 每个时间段最多100次试验，以搜索最佳指标参数
trials=300,
# 在每个时间段持续20次试验没有改善后停止搜索参数
early_stop=50)

In [7]:
tt.prune(max_inter_correlation=.7)

tt.report(target_corr=True, features_corr=True)


Indicator Correlation to Target:

                                                            Correlation
--------------------------------------------------------  -------------
tta_STDDEV_timeperiod_9                                        0.289113
tta_NATR_timeperiod_28                                         0.249891
tta_ADOSC_fastperiod_5_slowperiod_20                           0.200034
tta_MINUS_DM_timeperiod_7                                      0.199308
tta_ADX_timeperiod_11                                          0.159031
tta_ULTOSC_timeperiod1_37_timeperiod2_38_timeperiod3_25        0.154338
tta_HT_PHASOR                                                  0.153405
tta_MFI_timeperiod_35                                          0.113695
tta_MINUS_DI_timeperiod_24                                     0.097981
tta_HT_TRENDMODE                                               0.095771
tta_OBV                                                        0.089182
tta_BOP                      

In [8]:
features_train = tt.transform(train_X)

features_test = tt.transform(test_X)

Now training stock

In [9]:
stock = stock.merge(df['Target_Close_1min'], how='right',left_index=True,right_index=True)
stock_test = stock_test.merge(df_test['Target_Close_1min'], how='right',left_index=True,right_index=True)

In [10]:
stock = stock.dropna()
stock_test = stock_test.dropna()

In [11]:
STOCK_train_X = stock[['Open',
            'High',
            'Low',
            'Volume',
            'Turnover',
            'Close']]

STOCK_test_X = stock_test[['Open',
            'High',
            'Low',
            'Volume',
            'Turnover',
            'Close']]

STOCK_train_y = stock['Target_Close_1min']
STOCK_test_y = stock_test['Target_Close_1min']

tt = TuneTA(n_jobs=8, verbose=True)

tt.fit(STOCK_train_X, STOCK_train_y,
# 优化指标
indicators=['tta'],
# 待优化参数的两个参数范围（时间的短期和长期）
ranges=[(4, 40)],
# 每个时间段最多100次试验，以搜索最佳指标参数
trials=300,
# 在每个时间段持续20次试验没有改善后停止搜索参数
early_stop=50)

tt.prune(max_inter_correlation=.7)

tt.report(target_corr=True, features_corr=True)


Indicator Correlation to Target:

                                                              Correlation
----------------------------------------------------------  -------------
tta_STDDEV_timeperiod_9                                          0.241148
tta_TRANGE                                                       0.20839
tta_MACD_fastperiod_26_slowperiod_5_signalperiod_11              0.175596
tta_ADOSC_fastperiod_16_slowperiod_5                             0.157537
tta_OBV                                                          0.151049
tta_PLUS_DI_timeperiod_36                                        0.133652
tta_HT_PHASOR                                                    0.12963
tta_BBANDS_timeperiod_14                                         0.122872
tta_ULTOSC_timeperiod1_38_timeperiod2_25_timeperiod3_38          0.108458
tta_DX_timeperiod_24                                             0.101268
tta_ADX_timeperiod_9                                             0.099418
tta_M

In [12]:
STOCK_features_train = tt.transform(STOCK_train_X)

STOCK_features_test = tt.transform(STOCK_test_X)

Renaming STOCK features

In [13]:
STOCK_features_train = STOCK_features_train.add_prefix('STOCK_')
STOCK_features_test = STOCK_features_test.add_prefix('STOCK_')

In [14]:
print(STOCK_features_train.shape)
print(features_train.shape)

(5476, 75)
(5478, 68)


In [15]:
print('Number of rows before cleaning: %d'%features_train.shape[0])
features_train = features_train.dropna()
print('Number of rows after cleaning: %d'%features_train.shape[0])


print('Number of rows before cleaning: %d'%features_test.shape[0])
features_test = features_test.dropna()
print('Number of rows after cleaning: %d'%features_test.shape[0])

Number of rows before cleaning: 5478
Number of rows after cleaning: 5415
Number of rows before cleaning: 2382
Number of rows after cleaning: 2319


In [16]:
merged_train = features_train.merge(STOCK_features_train, how='left',left_index=True,right_index=True)
merged_test = features_test.merge(STOCK_features_test, how='left',left_index=True,right_index=True)

Clean after merging

In [17]:
print('Number of rows before cleaning: %d'%merged_train.shape[0])
merged_train = merged_train.dropna()
print('Number of rows after cleaning: %d'%merged_train.shape[0])


print('Number of rows before cleaning: %d'%merged_test.shape[0])
merged_test = merged_test.dropna()
print('Number of rows after cleaning: %d'%merged_test.shape[0])

Number of rows before cleaning: 5415
Number of rows after cleaning: 5409
Number of rows before cleaning: 2319
Number of rows after cleaning: 2315


In [18]:
print(merged_train.columns)

Index(['tta_STDDEV_timeperiod_9', 'tta_NATR_timeperiod_28',
       'tta_ADOSC_fastperiod_5_slowperiod_20', 'tta_MINUS_DM_timeperiod_7',
       'tta_ADX_timeperiod_11',
       'tta_ULTOSC_timeperiod1_37_timeperiod2_38_timeperiod3_25',
       'tta_HT_PHASOR_0', 'tta_HT_PHASOR_1', 'tta_MFI_timeperiod_35',
       'tta_MINUS_DI_timeperiod_24',
       ...
       'STOCK_tta_CDLINVERTEDHAMMER', 'STOCK_tta_CDLTHRUSTING',
       'STOCK_tta_CDLMATCHINGLOW', 'STOCK_tta_CDLHANGINGMAN',
       'STOCK_tta_CDL3OUTSIDE', 'STOCK_tta_CDLSTALLEDPATTERN',
       'STOCK_tta_CDLHARAMICROSS', 'STOCK_tta_CDL3INSIDE',
       'STOCK_tta_CDLSEPARATINGLINES', 'STOCK_tta_CDLHOMINGPIGEON'],
      dtype='object', length=143)


In [19]:
train_result = pd.merge(merged_train, train_y, how='left', left_index= True, right_index=True)
test_result = pd.merge(merged_test, test_y, how='left', left_index= True, right_index=True)

In [20]:
train_result.to_csv('train_withF_Close_1min.csv')
test_result.to_csv('test_withF_Close_1min.csv')