Read CSV

In [2]:
import pandas as pd

ohlc = pd.read_csv("BTCUSDT.csv.gz")
ohlc.head()

Unnamed: 0,Date,Open,Close,Low,High,BaseVolume,QuoteVolume
0,1514764800000,13715.65,13694.92,13666.11,13715.65,4.957404,38931.004413
1,1514764860000,13707.91,13680.0,13666.11,13707.91,3.459549,28910.982501
2,1514764920000,13682.0,13645.99,13601.0,13694.94,12.935828,157962.177508
3,1514764980000,13679.98,13600.0,13576.28,13679.98,27.313983,213978.120703
4,1514765040000,13645.98,13568.0,13554.44,13646.0,22.119789,213978.120703


Calculate Indicators

In [3]:
from ta.volatility import BollingerBands
import signals
from ta import momentum, volume, volatility;
import importlib
importlib.reload(signals)

minimum_deviation = 0.01

kline_count = len(ohlc['Date'])
zigzag = signals.zigzag(ohlc, 'High', 'Low', minimum_deviation)
effective_price = ohlc['QuoteVolume'] / ohlc['BaseVolume']
rsi7 = momentum.rsi(ohlc['Close'], 7)
rsi14 = momentum.rsi(ohlc['Close'], 14)
rsi21 = momentum.rsi(ohlc['Close'], 21)
mfi7 = volume.money_flow_index(
    high=ohlc['High'], low=ohlc['Low'], close=ohlc['Close'], volume=ohlc['QuoteVolume'], window=7)
mfi14 = volume.money_flow_index(
    high=ohlc['High'], low=ohlc['Low'], close=ohlc['Close'], volume=ohlc['QuoteVolume'], window=14)
mfi21 = volume.money_flow_index(
    high=ohlc['High'], low=ohlc['Low'], close=ohlc['Close'], volume=ohlc['QuoteVolume'], window=21)

Prepare datasets

In [11]:
import numpy as np
import numpy.ma as ma

sigs = signals.split_signals(zigzag)

long_signal_count = len(sigs["long"])
short_signal_count = len(sigs["short"])

signal_count = long_signal_count + short_signal_count
signal_period = min(sigs["long"][0], sigs["short"][1], round(kline_count / signal_count))

columns = ["Date", "Long", "Neutral", "Short"] \
    + [f"rsi7_{i}" for i in range(signal_period)]  \
    + [f"rsi14_{i}" for i in range(signal_period)]  \
    + [f"rsi21_{i}" for i in range(signal_period)]  \
    + [f"mfi7_{i}" for i in range(signal_period)]  \
    + [f"mfi14_{i}" for i in range(signal_period)]  \
    + [f"mfi21_{i}" for i in range(signal_period)] \

#+ [f"effective_price_{i}" for i in range(signal_period)] \
#+ [f"quote_volume_{i}" for i in range(signal_period)] \

signal_options = [
    [0, 1, 0], #Neutral
    [1, 0, 0], #Long
    [0, 0, 1], #Short
]

df = pd.DataFrame(
    [ma.concatenate([
        [ohlc['Date'][sigs["short"][i]]],
        signal_options[-1],
        #effective_price[sigs["short"][i] -
        #                signal_period:sigs["short"][i]].values,
        #ohlc['QuoteVolume'][sigs["short"][i] -
        #                signal_period:sigs["short"][i]].values,
        rsi7[sigs["short"][i] - signal_period:sigs["short"][i]].values,
        rsi14[sigs["short"][i] - signal_period:sigs["short"][i]].values,
        rsi21[sigs["short"][i] - signal_period:sigs["short"][i]].values,
        mfi7[sigs["short"][i] - signal_period:sigs["short"][i]].values,
        mfi14[sigs["short"][i] - signal_period:sigs["short"][i]].values,
        mfi21[sigs["short"][i] - signal_period:sigs["short"][i]].values,
    ])
        for i in range(short_signal_count)] +
    [ma.concatenate([
        [ohlc['Date'][sigs["long"][i]]],
        signal_options[1],
        #effective_price[sigs["long"][i] -
        #                signal_period:sigs["long"][i]].values,
        #ohlc['QuoteVolume'][sigs["long"][i] -
        #                    signal_period:sigs["long"][i]].values,
        rsi7[sigs["long"][i] - signal_period:sigs["long"][i]].values,
        rsi14[sigs["long"][i] - signal_period:sigs["long"][i]].values,
        rsi21[sigs["long"][i] - signal_period:sigs["long"][i]].values,
        mfi7[sigs["long"][i] - signal_period:sigs["long"][i]].values,
        mfi14[sigs["long"][i] - signal_period:sigs["long"][i]].values,
        mfi21[sigs["long"][i] - signal_period:sigs["long"][i]].values,
    ])
        for i in range(long_signal_count)], columns=columns).dropna()

print(
    f"generated data for {df.shape[0]} signals with signal period of {signal_period}. {signal_count - df.shape[0]} rows was empty")

signal_count = df.shape[0]

shuffled_df = df.sample(frac=1).reset_index(drop=True)
 

generated data for 10669 signals with signal period of 200. 53 rows was empty


Split data for training, validation and test

In [12]:
training_percentage = 0.9
validation_percentage = 0.05

training_data_count = round(signal_count * training_percentage)
validation_data_count = round(signal_count * validation_percentage)
test_data_count = signal_count - validation_data_count - training_data_count

training_df = shuffled_df.iloc[0:training_data_count, :]
training_df_x = training_df.drop(['Date', 'Long', 'Short', 'Neutral'], axis=1)
training_df_y = training_df[['Long', 'Neutral', 'Short']]

validation_df = shuffled_df.iloc[training_data_count:
                        validation_data_count + training_data_count, :]
validation_df_x = validation_df.drop(
    ['Date', 'Long', 'Short', 'Neutral'], axis=1)
validation_df_y = validation_df[['Long', 'Neutral', 'Short']]

test_df = shuffled_df.iloc[validation_data_count + training_data_count:, :]
test_df_x = test_df.drop(
    ['Date', 'Long', 'Short', 'Neutral'], axis=1)
test_df_y = test_df[['Long', 'Neutral', 'Short']]


Create a model and train

In [13]:
import os
from tensorflow import keras

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

input_layer_size = training_df_x.shape[1]
output_layer_size = training_df_y.shape[1]

model = keras.Sequential([
    keras.layers.Flatten(input_shape=(input_layer_size, 1)),
    keras.layers.Dense(output_layer_size, activation="softmax"),
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(training_df_x, training_df_y, epochs=100)

results = model.evaluate(validation_df_x, validation_df_y)
print("test loss, test acc:", results)

results = model.evaluate(test_df_x, test_df_y)
print("test loss, test acc:", results)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Predict today

In [25]:
X = df.drop(['Date', 'Long', 'Short', 'Neutral'], axis=1)[5, :]
Y = df[['Date', 'Long', 'Short', 'Neutral']]
print(X[5, :])

InvalidIndexError: (5, slice(None, None, None))