# CNN for Trading - Part 1: Feature Engineering

## Creating technical indicators at different intervals

## Imports & Settings

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from talib import (RSI, BBANDS, MACD,
                   NATR, WILLR, WMA,
                   EMA, SMA, CCI, CMO,
                   MACD, PPO, ROC,
                   ADOSC, ADX, MOM, MA, STOCHF)
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

## Loading Data

In [3]:
prices = pd.read_csv("data/kospi200 futures(30)_20211230.txt", sep='\t', header=None)
prices.columns = ["date", "time", "open", "high", "low", "close", "volume"]

In [4]:
prices

Unnamed: 0,date,time,open,high,low,close,volume
0,20070102,1030,187.85,187.90,186.55,187.15,26616
1,20070102,1100,187.10,187.20,186.45,186.75,13571
2,20070102,1130,186.75,187.50,186.70,187.35,17088
3,20070102,1200,187.40,187.55,187.25,187.45,5399
4,20070102,1230,187.45,187.60,187.30,187.35,6043
...,...,...,...,...,...,...,...
49442,20211230,1400,394.60,394.90,394.30,394.75,9365
49443,20211230,1430,394.80,395.30,394.20,394.30,14181
49444,20211230,1500,394.25,394.45,393.65,393.75,16139
49445,20211230,1530,393.75,394.50,393.65,394.45,14634


## Generate Technical Indicators Factors

In [5]:
T = [5,10,15,20,30,40,50,60,80,100,120,140,160,180,200,240]

In [6]:
T

[5, 10, 15, 20, 30, 40, 50, 60, 80, 100, 120, 140, 160, 180, 200, 240]

In [7]:
T = list(x * 3 + 4 for x in range(1, 17))

### Relative Strength Index (RSI)

In [8]:
for t in T:
    prices[f'{t:02}_RSI'] = RSI(prices.close.copy(), timeperiod=t)

### Williams %R

In [9]:
for t in T:
    prices[f'{t:02}_WILLR'] = WILLR(prices.high.copy(), prices.low.copy(), prices.close.copy(), timeperiod=t)

### Compute Bollinger Bands

In [10]:
def compute_bb(close, timeperiod):
    high, mid, low = BBANDS(close, timeperiod=timeperiod)
    return pd.DataFrame({f'{timeperiod:02}_BBH': high, f'{timeperiod:02}_BBL': low}, index=close.index)

In [11]:
for t in T:
    bbh, bbl = f'{t:02}_BBH', f'{t:02}_BBL'
    bb = compute_bb(prices.close.copy(), timeperiod=t)
    prices[bbh] = bb[bbh].sub(prices.close).div(prices.close).apply(np.log1p)
    prices[bbl] = prices.close.sub(bb[bbl]).div(prices.close).apply(np.log1p)

### Moving Average

In [12]:
for t in T:
    prices[f'{t:02}_MA'] = MA(prices.close.copy(), timeperiod=t).pct_change()

### Percentage Price Oscillator

In [13]:
for t in T:
    prices[f'{t:02}_PPO'] = PPO(prices.close.copy(), fastperiod=t, matype=1)

### Moving Average Convergence/Divergence

In [14]:
def compute_macd(close, signalperiod):
    macd = MACD(close, signalperiod=signalperiod)[0]
    return (macd - np.mean(macd))/np.std(macd)

In [15]:
for t in T:
    prices[f'{t:02}_MACD'] = compute_macd(prices.close.copy(), signalperiod=t)

### Momentum

In [16]:
for t in T:
    prices[f'{t:02}_MOM'] = MOM(prices.close.copy(), timeperiod=t)

### Weighted Moving Average

In [17]:
for t in T:
    prices[f'{t:02}_WMA'] = WMA(prices.close.copy(), timeperiod=t).pct_change()

### Exponential Moving Average

In [18]:
for t in T:
    prices[f'{t:02}_EMA'] = EMA(prices.close.copy(), timeperiod=t).pct_change()

### Commodity Channel Index

In [19]:
for t in T:    
    prices[f'{t:02}_CCI'] = CCI(prices.high, prices.low, prices.close, timeperiod=t)

### Chande Momentum Oscillator

In [20]:
for t in T:
    prices[f'{t:02}_CMO'] = CMO(prices.close, timeperiod=t)

### Rate of Change

Rate of change is a technical indicator that illustrates the speed of price change over a period of time.

In [21]:
for t in T:
    prices[f'{t:02}_ROC'] = ROC(prices.close, timeperiod=t)

### Chaikin A/D Oscillator

In [22]:
for t in T:
    prices[f'{t:02}_ADOSC'] = ADOSC(prices.high, prices.low, prices.close, prices.volume, fastperiod=t-3, slowperiod=4+t)

### Average Directional Movement Index

In [23]:
for t in T:
    fastk, fastd = STOCHF(prices.high, prices.low, prices.close, fastk_period=t, fastd_period=3, fastd_matype=0)
    prices[f'{t:02}_FASTD'] = fastd

### Return

In [24]:
for t in T:
    prices[f'{t:02}_RETURN'] = prices.close.pct_change(t)

## Labeling

In [25]:
def updown(close, lags = [1,5,21]):
    updown = pd.DataFrame()
    for lag in lags:
        updown[f'{lag}_updown'] = close.pct_change(lag).apply(lambda x : 0 if x >= 0 else 1).shift(-lag)
    return updown

In [26]:
lags = [1,5,21]
prices[[f'{lag}_updown' for lag in lags]] = updown(prices.close, lags)

## Save Data

In [27]:
data = prices.drop(columns=["open", "high", "low", "volume"])

In [28]:
data.to_csv('data/1.kospi200futures_TI.csv', index=False)

In [29]:
data

Unnamed: 0,date,time,close,07_RSI,10_RSI,13_RSI,16_RSI,19_RSI,22_RSI,25_RSI,...,34_RETURN,37_RETURN,40_RETURN,43_RETURN,46_RETURN,49_RETURN,52_RETURN,1_updown,5_updown,21_updown
0,20070102,1030,187.15,,,,,,,,...,,,,,,,,1.0,0.0,1.0
1,20070102,1100,186.75,,,,,,,,...,,,,,,,,0.0,0.0,1.0
2,20070102,1130,187.35,,,,,,,,...,,,,,,,,0.0,1.0,1.0
3,20070102,1200,187.45,,,,,,,,...,,,,,,,,1.0,1.0,1.0
4,20070102,1230,187.35,,,,,,,,...,,,,,,,,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49442,20211230,1400,394.75,36.806190,38.738755,40.768487,42.467639,43.865237,45.023160,45.987350,...,0.001395,0.002540,0.000253,-0.000759,-0.002779,-0.001518,-0.005668,1.0,,
49443,20211230,1430,394.30,31.838080,35.216103,37.958982,40.101412,41.809076,43.200465,44.349486,...,0.000634,-0.002025,-0.000507,-0.001266,-0.003790,-0.003160,-0.005549,1.0,,
49444,20211230,1500,393.75,26.699229,31.345276,34.784966,37.385707,39.424899,41.071484,42.425739,...,-0.001521,-0.001015,-0.001395,-0.003165,-0.003921,-0.005305,-0.002154,0.0,,
49445,20211230,1530,394.45,40.870413,40.581179,41.526394,42.657559,43.735325,44.704850,45.556633,...,-0.000253,0.000634,0.001778,-0.000507,-0.001519,-0.003537,-0.002276,1.0,,
