# 初期設定

In [1]:
from typing import List
import os
from glob import glob
from datetime import datetime

import numpy as np
import pandas as pd
from nptyping import NDArray
import numba
import talib

INPUT_DIR = os.path.join('..', 'orderbook')
OUTPUT_DIR = os.path.join('..', 'features')

os.makedirs(OUTPUT_DIR, exist_ok=True)

# データ読み込み

In [2]:
def sort_by_datetime(pathlist: List[str]) -> List[str]:
    """ファイルパスのリストを日付によってソート"""

    def get_datetime(filename: str) -> str:
        """ファイル名の日付部分をdatetimeで返却"""
        date_str : str = os.path.basename(filename).split('_')[0]
        return datetime.strptime(date_str, '%Y%m%d')

    return  sorted(pathlist, key=lambda x: get_datetime(x))

In [3]:
bybit_datas_pathlist = glob(os.path.join(INPUT_DIR, '*_bybit.pkl.bz2'))
bybit_datas_pathlist = sort_by_datetime(bybit_datas_pathlist)

df_bybit_list = [pd.read_pickle(path) for path in bybit_datas_pathlist]
df_bybit = pd.concat(df_bybit_list)

df_bybit

Unnamed: 0,timestamp,close,buy_size_1,buy_size_2,buy_price_1,buy_price_2,sell_size_1,sell_size_2,sell_price_1,sell_price_2
0,2021-09-05 23:50:00.000008,50373.5,4867231,347737,50373.00,50372.50,3817624,2900,50373.50,50374.00
1,2021-09-05 23:50:05.000874,50373.0,4271988,346124,50373.00,50372.50,3998619,12026,50373.50,50374.00
2,2021-09-05 23:50:10.000845,50373.0,624357,2168,50373.00,50372.50,5505129,12026,50373.50,50374.00
3,2021-09-05 23:50:15.000619,50371.5,697185,279440,50371.50,50371.00,6489551,248445,50372.00,50372.50
4,2021-09-05 23:50:20.000957,50371.5,643874,205555,50371.50,50371.00,6593173,248445,50372.00,50372.50
...,...,...,...,...,...,...,...,...,...,...
15119,2021-09-20 21:00:02.000816,43731.5,16858,1832,43730.50,43730.00,3370061,452500,43731.00,43731.50
15120,2021-09-20 21:00:07.000853,43691.5,2254475,20002,43691.50,43691.00,634857,19301,43692.00,43694.50
15121,2021-09-20 21:00:12.000787,43687.0,345398,20002,43661.50,43661.00,2708842,2599,43662.00,43664.50
15122,2021-09-20 21:00:17.001051,43660.5,1032628,339970,43673.00,43672.50,1030147,95001,43673.50,43674.00


# 特徴量作成

In [24]:
def create_features(df):
    """talibを用いて特徴量作成"""
    df = df.copy()
    close = df['close']

    df['BBANDS_upperband'], df['BBANDS_middleband'], df['BBANDS_lowerband'] = talib.BBANDS(close, timeperiod=10, nbdevup=2, nbdevdn=2, matype=0)
    df['SMA'] = talib.SMA(close, timeperiod=30)
    df['EMA'] = talib.EMA(close, timeperiod=30)
    df['DEMA'] = talib.DEMA(close, timeperiod=30)  # 2重指数移動平均
    df['MACD_macd'], df['MACD_macdsignal'], df['MACD_macdhist'] = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
    df['TEMA10'] = talib.TEMA(close, timeperiod=10)
    df['TEMA30'] = talib.TEMA(close, timeperiod=30)

    return df.dropna()


df = create_features(df=df_bybit)
df

Unnamed: 0,timestamp,close,buy_size_1,buy_size_2,buy_price_1,buy_price_2,sell_size_1,sell_size_2,sell_price_1,sell_price_2,...,BBANDS_middleband,BBANDS_lowerband,SMA,EMA,DEMA,MACD_macd,MACD_macdsignal,MACD_macdhist,TEMA10,TEMA30
87,2021-09-05 23:57:15.001627,50250.0,5002370,261469,50249.50,50249.00,2479452,500,50250.00,50251.00,...,50233.70,50216.103409,50279.566667,50268.624093,50233.387927,-19.876795,-23.340356,3.463561,50244.371152,50224.005137
88,2021-09-05 23:57:20.001557,50250.0,4648618,204301,50249.50,50249.00,752913,1,50250.00,50250.50,...,50236.20,50217.209476,50276.916667,50267.422539,50233.335639,-17.896456,-22.251576,4.355120,50248.733752,50225.633310
89,2021-09-05 23:57:25.001585,50250.0,4227320,254582,50249.50,50249.00,2521223,1,50250.00,50250.50,...,50238.70,50219.683165,50274.283333,50266.298504,50233.359243,-16.140960,-21.029453,4.888492,50251.510507,50227.227435
90,2021-09-05 23:57:30.001533,50250.0,2655342,254558,50249.50,50249.00,4262308,255364,50250.00,50250.50,...,50241.20,50223.518371,50271.633333,50265.246987,50233.449164,-14.581629,-19.739888,5.158259,50253.167602,50228.780752
91,2021-09-05 23:57:35.001609,50249.5,2233474,9131,50249.50,50249.00,4956121,243707,50250.00,50250.50,...,50243.15,50226.572606,50268.616667,50264.231053,50233.534311,-13.233645,-18.438639,5.204994,50253.821845,50230.197132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15119,2021-09-20 21:00:02.000816,43731.5,16858,1832,43730.50,43730.00,3370061,452500,43731.00,43731.50,...,43733.45,43715.755651,43713.300000,43708.725180,43743.388409,14.686828,15.280547,-0.593719,43737.899929,43740.298128
15120,2021-09-20 21:00:07.000853,43691.5,2254475,20002,43691.50,43691.00,634857,19301,43692.00,43694.50,...,43730.85,43701.047148,43713.700000,43707.613878,43739.001165,10.677770,14.359992,-3.682222,43717.756757,43733.045666
15121,2021-09-20 21:00:12.000787,43687.0,345398,20002,43661.50,43661.00,2708842,2599,43662.00,43664.50,...,43727.80,43688.444695,43713.950000,43706.283950,43734.402125,7.056112,12.899216,-5.843104,43702.222330,43725.772650
15122,2021-09-20 21:00:17.001051,43660.5,1032628,339970,43673.00,43672.50,1030147,95001,43673.50,43674.00,...,43720.10,43664.550383,43712.900000,43703.330147,43726.871011,2.024259,10.724224,-8.699965,43680.131922,43714.516275


In [25]:
@numba.njit
def create_trade_signal_macd(
        macd: NDArray[float], 
        signal: NDArray[float],
        tilt_threshold: float) -> List[int]:
    
    num_data : int = len(macd)
    trade_signal_macd : List[int] = [0] * num_data
    tilt : List[float] = [0.0] * num_data

    for i in range(1, num_data):
        if (signal[i-1] > macd[i-1]) and (signal[i] < macd[i]):
            if signal[i-1] < 0 and macd[i-1] < 0:
                tilt[i] = macd[i] - macd[i-1]
                if tilt[i] > tilt_threshold:
                    trade_signal_macd[i] = 1
        elif (signal[i-1] < macd[i-1]) and (signal[i] > macd[i]):
            if signal[i-1] > 0 and macd[i-1] > 0:
                tilt[i] = macd[i] - macd[i-1]
                if abs(tilt[i]) > tilt_threshold:
                    trade_signal_macd[i] = -1
                
    return trade_signal_macd


df['trade_signal_macd'] = create_trade_signal_macd(
    macd=df['MACD_macd'].values,
    signal=df['MACD_macdsignal'].values,
    tilt_threshold=0.0)

In [29]:
df_bybit.to_pickle(os.path.join(OUTPUT_DIR, 'bybit.pkl.bz2'), compression='bz2')

# プロット

In [27]:
df_plot = df.iloc[-500:]

df_macd_buy = df_plot[df_plot['trade_signal_macd'] == 1][['timestamp', 'close', 'MACD_macd', 'trade_signal_macd']]
df_macd_sell = df_plot[df_plot['trade_signal_macd'] == -1][['timestamp', 'close', 'MACD_macd', 'trade_signal_macd']]

In [28]:
buy_marker = dict(
    color='rgba(242, 192, 26, .9)',
    size=13,
    line=dict(color='rgba(184, 210, 0, .4)', width=2.5))

sell_marker = dict(
    color='rgba(242, 192, 26, .9)',
    size=13,
    line=dict(color='rgba(255, 160, 122, .4)', width=2.5))



import plotly.graph_objects as go
from plotly.subplots import make_subplots

# figを定義
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05, row_width=[0.4, 0.6], x_title="Date")

# close 
fig.add_trace(
    go.Scatter(x=df_plot["timestamp"], y=df_plot["close"], name="OHLC", mode="lines"),
    row=1, col=1)
fig.add_trace(
    go.Scatter(x=df_macd_buy['timestamp'], y=df_macd_buy['close'], name='signal_MACD', mode='markers', marker=buy_marker, marker_symbol='star'),
    row=1, col=1)
fig.add_trace(
    go.Scatter(x=df_macd_sell['timestamp'], y=df_macd_sell['close'], name='signal_MACD', mode='markers', marker=sell_marker, marker_symbol='star'),
    row=1, col=1)
# BB
fig.add_trace(
    go.Scatter(x=df_plot["timestamp"], y=df_plot["BBANDS_upperband"], name="BB_up", mode="lines"),
    row=1, col=1)
fig.add_trace(
    go.Scatter(x=df_plot["timestamp"], y=df_plot["BBANDS_lowerband"], name="BB_down", mode="lines"),
    row=1, col=1)


# MACD
fig.add_trace(
    go.Scatter(x=df_plot["timestamp"], y=df_plot["MACD_macd"], name="MACD", mode="lines"),
    row=2, col=1)
fig.add_trace(
    go.Scatter(x=df_plot["timestamp"], y=df_plot["MACD_macdsignal"], name="Signal", mode="lines"),
    row=2, col=1)
fig.add_trace(
    go.Scatter(x=df_macd_buy['timestamp'], y=df_macd_buy['MACD_macd'], name='signal_MACD', mode='markers', marker=buy_marker, marker_symbol='star'),
    row=2, col=1)
fig.add_trace(
    go.Scatter(x=df_macd_sell['timestamp'], y=df_macd_sell['MACD_macd'], name='signal_MACD', mode='markers', marker=sell_marker, marker_symbol='star'),
    row=2, col=1)


# y軸名を定義
fig.update_yaxes(title_text="USD", row=1, col=1)
fig.update_yaxes(title_text="MACD", row=2, col=1)


fig.update(layout_xaxis_rangeslider_visible=False)
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()