In [187]:
import pandas as pd
import numpy as np
import talib as ta

PATH_READ = '123181_train_raw.csv'
PATH_READ_STOCK = '301085_train_raw.csv'

PATH_WRITE = '123181_train_withF.csv'
# df = pd.read_csv(PATH_READ, index_col='Timestamp')
df = pd.read_csv(PATH_READ)
df_stock = pd.read_csv(PATH_READ_STOCK)

df = df.drop(['InstrumentID','TradingDay','PreClosePrice',], axis=1)

print(df.columns)

Index(['Timestamp', 'Open', 'High', 'Low', 'Volume', 'Turnover', 'Close'], dtype='object')


In [188]:
def log_Return (df,type,period):
    return np.log(df[type] / df[type].shift(period))

In [189]:
def BBands (df, type, period):
    close_prices = np.array(df[type])

    upper_band, middle_band, lower_band = ta.BBANDS(close_prices, timeperiod=period)

    return upper_band, middle_band, lower_band

In [190]:
def MACD(df, column, fastPeriod, slowPeriod, signalPeriod):
    close = np.array(df[column])
    macd, signal, hist = ta.MACD(close, fastperiod=fastPeriod, slowperiod=slowPeriod, signalperiod=signalPeriod)
    return macd

In [191]:
def RSI (df, type, period):
    close = np.array(df[type])
    return ta.RSI(close, timeperiod = period)

In [192]:
def SAR (df, acc, max):
    high = np.array(df['High'])
    low = np.array(df['Low'])
    
    return ta.SAR(high, low, acceleration = acc, maximum = max)

In [193]:
def target(df, column, period):
    if period == 1:
        return (df[column].shift(-1) - df[column])/df[column]
    if period > 1:
        return (df[column].rolling(window=period).mean().shift(-1 * period) - df[column]) / df[column]

In [194]:
df['Avg'] = df['Turnover']/df['Volume']

nan_rows_Avg = df[df['Avg'].isnull()]

# 如遇涨/跌停，直接用高开低收的价格，因为都一样
for index, row in nan_rows_Avg.iterrows():
    if row['High'] == row['Low'] == row['Open'] == row['Close']:
        row['Avg'] = row['High']

df.update(nan_rows_Avg)

Adding features

In [195]:
df['log_ret_close_1min'] = log_Return(df, 'Close', 1)
df['BBANDS_close_16min_up'], df['BBANDS_close_16min_mid'], df['BBANDS_close_16min_low'] = BBands(df, 'Close', 16)
df['MACD_close_F12S26S9'] = MACD(df, 'Close', fastPeriod=12, slowPeriod=26, signalPeriod=9)
df['RSI_close_12min'] = RSI(df, 'Close', period=12)
df['SAR'] = SAR(df, 0.02, 0.2)


print(df.columns)

Index(['Timestamp', 'Open', 'High', 'Low', 'Volume', 'Turnover', 'Close',
       'Avg', 'log_ret_close_1min', 'BBANDS_close_16min_up',
       'BBANDS_close_16min_mid', 'BBANDS_close_16min_low',
       'MACD_close_F12S26S9', 'RSI_close_12min', 'SAR'],
      dtype='object')


Add Targets

In [196]:
df['Target_Close_1min'] = target(df,'Close', 1)
df['Target_Avg_1min'] = target(df, 'Avg', 1)

In [197]:
df_stock['STOCK_log_ret_close_1min'] = log_Return(df_stock, 'Close', 1)
df_stock['STOCK_BBANDS_close_16min_up'], df_stock['STOCK_BBANDS_close_16min_mid'], df_stock['STOCK_BBANDS_close_16min_low'] = BBands(df_stock, 'Close', 16)
df_stock['STOCK_MACD_close_F12S26S9'] = MACD(df_stock, 'Close', fastPeriod=12, slowPeriod=26, signalPeriod=9)
df_stock['STOCK_RSI_close_12min'] = RSI(df_stock, 'Close', period=12)
df_stock['STOCK_SAR'] = SAR(df_stock, 0.02, 0.2)


Merging

In [198]:
df_stock = df_stock[['STOCK_log_ret_close_1min', 'STOCK_BBANDS_close_16min_up', 'STOCK_BBANDS_close_16min_mid',
       'STOCK_BBANDS_close_16min_low', 'STOCK_MACD_close_F12S26S9',
       'STOCK_RSI_close_12min', 'STOCK_SAR']]

result = pd.merge(df, df_stock, how='left', left_index= True, right_index=True)

Cleaning and saving

In [199]:
print('Number of rows before cleaning: %d'%result.shape[0])

result = result.dropna()

result.index = result['Timestamp']
result = result.drop(['Timestamp'], axis=1)

print('Number of rows after cleaning: %d'%result.shape[0])

result.to_csv(PATH_WRITE)

Number of rows before cleaning: 5840
Number of rows after cleaning: 5638
