In [1]:
import pandas as pd
import plotly.graph_objects as go # 可以畫 candle 圖的套件
import utils

In [3]:
pair = "USD_JPY"
granularity = "H4"

df_raw = pd.read_pickle(utils.get_his_data_filename(pair, granularity))

In [5]:
df_raw.head()

Unnamed: 0,time,volume,mid_o,mid_h,mid_l,mid_c,bid_o,bid_h,bid_l,bid_c,ask_o,ask_h,ask_l,ask_c
0,2018-01-01T22:00:00.000000000Z,2968,112.666,112.793,112.582,112.723,112.626,112.786,112.559,112.716,112.706,112.8,112.601,112.73
1,2018-01-02T02:00:00.000000000Z,1009,112.72,112.751,112.666,112.688,112.712,112.744,112.657,112.683,112.728,112.758,112.674,112.694
2,2018-01-02T06:00:00.000000000Z,2226,112.691,112.7,112.249,112.252,112.684,112.694,112.242,112.245,112.698,112.705,112.256,112.258
3,2018-01-02T10:00:00.000000000Z,2056,112.254,112.282,112.086,112.124,112.247,112.275,112.079,112.119,112.26,112.288,112.092,112.13
4,2018-01-02T14:00:00.000000000Z,3781,112.122,112.304,112.058,112.285,112.114,112.297,112.051,112.278,112.129,112.311,112.065,112.292


In [8]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4685 entries, 0 to 407
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   time    4685 non-null   object
 1   volume  4685 non-null   int64 
 2   mid_o   4685 non-null   object
 3   mid_h   4685 non-null   object
 4   mid_l   4685 non-null   object
 5   mid_c   4685 non-null   object
 6   bid_o   4685 non-null   object
 7   bid_h   4685 non-null   object
 8   bid_l   4685 non-null   object
 9   bid_c   4685 non-null   object
 10  ask_o   4685 non-null   object
 11  ask_h   4685 non-null   object
 12  ask_l   4685 non-null   object
 13  ask_c   4685 non-null   object
dtypes: int64(1), object(13)
memory usage: 549.0+ KB


In [10]:
# recall: 在 candle_plot.ipynb 中，我們提到要把不是數值型態的 column 都改為數值型態
non_cols = ['volumn', 'time'] # 雖然 time 是 object，但我們不想改變它的型態
mod_cols = [x for x in df_raw.columns if x not in non_cols]

df_raw[mod_cols] = df_raw[mod_cols].apply(pd.to_numeric)

In [11]:
df_raw.columns

Index(['time', 'volume', 'mid_o', 'mid_h', 'mid_l', 'mid_c', 'bid_o', 'bid_h',
       'bid_l', 'bid_c', 'ask_o', 'ask_h', 'ask_l', 'ask_c'],
      dtype='object')

In [12]:
# 把不需要用到的 column 去除
df = df_raw[ ['time', 'mid_o', 'mid_h', 'mid_l', 'mid_c', 'bid_c', 'ask_c'] ].copy()

In [13]:
df.head(3)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,bid_c,ask_c
0,2018-01-01T22:00:00.000000000Z,112.666,112.793,112.582,112.723,112.716,112.73
1,2018-01-02T02:00:00.000000000Z,112.72,112.751,112.666,112.688,112.683,112.694
2,2018-01-02T06:00:00.000000000Z,112.691,112.7,112.249,112.252,112.245,112.258


--- 

## 開始建構策略邏輯

In [14]:
# 若當日收盤價大於開盤價，當天 imply 的交易方向即為 1 (上漲趨勢)
def direction(row):
    if row['mid_c'] > row['mid_o']:
        return 1
    return -1

In [23]:
df['RANGE'] = df['mid_h'] - df['mid_l'] # 計算前一根 candle 的 range
df['RANGE_PREV'] = df.RANGE.shift(1) # 需要將今天的 range 與昨日的 range 比較


df['mid_h_prev'] = df.mid_h.shift(1)
df['mid_l_prev'] = df.mid_o.shift(1)


df['DIRECTION'] = df.apply(direction, axis=1) # 沿 row 方向 apply function
df['DIRECTION_prev'] = df.DIRECTION.shift(1) # 這可以幫助我們判斷是否要出場

In [24]:
df.head(3)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,bid_c,ask_c,RANGE,RANGE_PREV,mid_h_prev,mid_o_prev,DIRECTION,DIRECTION_prev,mid_l_prev
1,2018-01-02T02:00:00.000000000Z,112.72,112.751,112.666,112.688,112.683,112.694,0.085,,,112.666,-1,,
2,2018-01-02T06:00:00.000000000Z,112.691,112.7,112.249,112.252,112.245,112.258,0.451,0.085,112.751,112.72,-1,-1.0,112.72
3,2018-01-02T10:00:00.000000000Z,112.254,112.282,112.086,112.124,112.119,112.13,0.196,0.451,112.7,112.691,-1,-1.0,112.691


In [25]:
# 發現 DIRECTION_prev 的資料型態被 pandas 改為 float 了
df['DIRECTION_prev'] = df.DIRECTION.shift(1).fillna(0).astype(int)
df.dropna(inplace=True)

In [26]:
df.head(3)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,bid_c,ask_c,RANGE,RANGE_PREV,mid_h_prev,mid_o_prev,DIRECTION,DIRECTION_prev,mid_l_prev
2,2018-01-02T06:00:00.000000000Z,112.691,112.7,112.249,112.252,112.245,112.258,0.451,0.085,112.751,112.72,-1,-1,112.72
3,2018-01-02T10:00:00.000000000Z,112.254,112.282,112.086,112.124,112.119,112.13,0.196,0.451,112.7,112.691,-1,-1,112.691
4,2018-01-02T14:00:00.000000000Z,112.122,112.304,112.058,112.285,112.278,112.292,0.246,0.196,112.282,112.254,1,-1,112.254


## signal of the trade

In [27]:
def get_signal(row):
    if row.mid_h_prev > row.mid_h and row.mid_l_prev < row.mid_l: # 意思就是本日的 candle 完全包覆在上一日的 candle 裡面
        return row.DIRECTION_prev
    return 0

In [28]:
df['SIGNAL'] = df.apply(get_signal, axis=1)
df.reset_index(drop=True, inplace=True)

In [29]:
df.head(3)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,bid_c,ask_c,RANGE,RANGE_PREV,mid_h_prev,mid_o_prev,DIRECTION,DIRECTION_prev,mid_l_prev,SIGNAL
0,2018-01-02T06:00:00.000000000Z,112.691,112.7,112.249,112.252,112.245,112.258,0.451,0.085,112.751,112.72,-1,-1,112.72,0
1,2018-01-02T10:00:00.000000000Z,112.254,112.282,112.086,112.124,112.119,112.13,0.196,0.451,112.7,112.691,-1,-1,112.691,0
2,2018-01-02T14:00:00.000000000Z,112.122,112.304,112.058,112.285,112.278,112.292,0.246,0.196,112.282,112.254,1,-1,112.254,0


In [30]:
df[df.SIGNAL != 0].shape # 243 trading opportunity

(243, 15)

In [31]:
df.shape

(4683, 15)

## put everything together:
```python
def direction(row):
    if row['mid_c'] > row['mid_o']:
        return 1
    return -1

def get_signal(row):
    if row.mid_h_prev > row.mid_h and row.mid_l_prev < row.mid_l:
        return row.DIRECTION_prev
    return 0

df = df_raw[ ['time', 'mid_o', 'mid_h', 'mid_l', 'mid_c', 'bid_c', 'ask_c'] ].copy()
df['RANGE'] = df['mid_h'] - df['mid_l'] # 計算前一根 candle 的 range
df['RANGE_PREV'] = df.RANGE.shift(1) # 需要將今天的 range 與昨日的 range 比較
df['mid_h_prev'] = df.mid_h.shift(1)
df['mid_l_prev'] = df.mid_o.shift(1)
df['DIRECTION'] = df.apply(direction, axis=1) # 沿 row 方向 apply function
df['DIRECTION_prev'] = df.DIRECTION.shift(1).fillna(0).astype(int)
df.dropna(inplace=True)
df['SIGNAL'] = df.apply(get_signal, axis=1)
df.reset_index(drop=True, inplace=True)

```