Read CSV

In [1]:
import pandas as pd

unaggregated_ohlc = pd.read_csv("data/BTCUSDT.csv.gz")
unaggregated_ohlc['datetime'] = pd.to_datetime(
    unaggregated_ohlc['Date'], unit='ms')
ohlc = unaggregated_ohlc.resample('30min', on="datetime").agg({
    'Date': 'first',
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'BaseVolume': 'sum',
    'QuoteVolume': 'sum'})

ohlc.head()

Unnamed: 0_level_0,Date,Open,High,Low,Close,BaseVolume,QuoteVolume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-01 00:00:00,1514764800000,13715.65,13715.65,13400.01,13500.0,443.036395,4073998.0
2018-01-01 00:30:00,1514766600000,13500.0,13690.87,13450.0,13543.87,442.367474,4123942.0
2018-01-01 01:00:00,1514768400000,13528.99,13595.89,13402.28,13557.99,319.945801,2850844.0
2018-01-01 01:30:00,1514770200000,13559.99,13559.99,13155.38,13280.28,457.116588,3984973.0
2018-01-01 02:00:00,1514772000000,13203.0,13411.76,13200.0,13208.0,406.188208,3174410.0


In [2]:

unaggregated_ohlc.head()


Unnamed: 0,Date,Open,Close,Low,High,BaseVolume,QuoteVolume,datetime
0,1514764800000,13715.65,13694.92,13666.11,13715.65,4.957404,38931.004413,2018-01-01 00:00:00
1,1514764860000,13707.91,13680.0,13666.11,13707.91,3.459549,28910.982501,2018-01-01 00:01:00
2,1514764920000,13682.0,13645.99,13601.0,13694.94,12.935828,157962.177508,2018-01-01 00:02:00
3,1514764980000,13679.98,13600.0,13576.28,13679.98,27.313983,213978.120703,2018-01-01 00:03:00
4,1514765040000,13645.98,13568.0,13554.44,13646.0,22.119789,213978.120703,2018-01-01 00:04:00


Calculate Indicators

In [3]:
import signals
from ta import momentum, volume, volatility;
import importlib
importlib.reload(signals)

minimum_deviation = 0.02

kline_count = len(ohlc['Date'])
zigzag = signals.zigzag(ohlc, 'High', 'Low', minimum_deviation)
effective_price = ohlc['QuoteVolume'] / ohlc['BaseVolume']
rsi7 = momentum.rsi(ohlc['Close'], 7)
rsi14 = momentum.rsi(ohlc['Close'], 14)
rsi21 = momentum.rsi(ohlc['Close'], 21)
mfi7 = volume.money_flow_index(
    high=ohlc['High'], low=ohlc['Low'], close=ohlc['Close'], volume=ohlc['QuoteVolume'], window=7)
mfi14 = volume.money_flow_index(
    high=ohlc['High'], low=ohlc['Low'], close=ohlc['Close'], volume=ohlc['QuoteVolume'], window=14)
mfi21 = volume.money_flow_index(
    high=ohlc['High'], low=ohlc['Low'], close=ohlc['Close'], volume=ohlc['QuoteVolume'], window=21)

long_count = len(zigzag.index[zigzag == +1])
short_count = len(zigzag.index[zigzag == -1])

print(
    f"generated {short_count} short and {long_count} long signals. Signals has avg. {kline_count / (long_count + short_count)} frequency")


generated 882 short and 883 long signals. Signals has avg. 40.59773371104816 frequency


Prepare datasets

In [4]:
import numpy as np
import numpy.ma as ma


lookback_period = 80
kline_count = ohlc.shape[0]

columns = ["Date", "Long", "Short"] \
    + [f"rsi7_{i}" for i in range(lookback_period)]  \
    + [f"rsi14_{i}" for i in range(lookback_period)]  \
    + [f"rsi21_{i}" for i in range(lookback_period)]  \
    + [f"mfi7_{i}" for i in range(lookback_period)]  \
    + [f"mfi14_{i}" for i in range(lookback_period)]  \
    + [f"mfi21_{i}" for i in range(lookback_period)] \

#+ [f"effective_price_{i}" for i in range(lookback_period)] \
#+ [f"quote_volume_{i}" for i in range(lookback_period)] \

signal_options = [
    [0, 0], #Neutral
    [1, 0], #Long
    [0, 1], #Short
]

df = pd.DataFrame(
    [ma.concatenate([
        [ohlc['Date'][i]],
        signal_options[zigzag[i]],
        #effective_price[i - lookback_period].values,
        #ohlc['QuoteVolume'][i - lookback_period].values,
        rsi7[i - lookback_period:i].values,
        rsi14[i - lookback_period:i].values,
        rsi21[i - lookback_period:i].values,
        mfi7[i - lookback_period:i].values,
        mfi14[i - lookback_period:i].values,
        mfi21[i - lookback_period:i].values,
    ])
    for i in range(lookback_period, kline_count)], columns=columns).dropna()

print(
    f"generated data for {df.shape[0]} signals with lookback period of {lookback_period}. {kline_count - lookback_period - df.shape[0]} rows was empty")

data_count = df.shape[0]

shuffled_df = df.sample(frac=1).reset_index(drop=True)
 

generated data for 68695 signals with lookback period of 80. 2880 rows was empty


Split data for training, validation and test

In [5]:
training_percentage = 0.9
validation_percentage = 0.05

training_data_count = round(data_count * training_percentage)
validation_data_count = round(data_count * validation_percentage)
test_data_count = data_count - validation_data_count - training_data_count

training_df = shuffled_df.iloc[0:training_data_count, :]
training_df_x = training_df.drop(['Date', 'Long', 'Short'], axis=1)
training_df_y = training_df[['Long', 'Short']]

validation_df = shuffled_df.iloc[training_data_count:
                        validation_data_count + training_data_count, :]
validation_df_x = validation_df.drop(
    ['Date', 'Long', 'Short'], axis=1)
validation_df_y = validation_df[['Long', 'Short']]

test_df = shuffled_df.iloc[validation_data_count + training_data_count:, :]
test_df_x = test_df.drop(
    ['Date', 'Long', 'Short'], axis=1)
test_df_y = test_df[['Long', 'Short']]


Create a model and train

In [8]:
import os
from tensorflow import keras

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

input_layer_size = training_df_x.shape[1]
output_layer_size = training_df_y.shape[1]

model = keras.Sequential([
    keras.layers.Flatten(input_shape=(input_layer_size, 1)),
    keras.layers.Dense(output_layer_size, activation="sigmoid"),
])

model.compile(optimizer="adam",
              loss=keras.losses.MeanSquaredError(), metrics=["accuracy"])
model.fit(training_df_x, training_df_y, epochs=10)

results = model.evaluate(validation_df_x, validation_df_y)
print("test loss, test acc:", results)

results = model.evaluate(test_df_x, test_df_y)
print("test loss, test acc:", results)

X = df.drop(['Date', 'Long', 'Short'], axis=1)


predicted = model.predict(X)

results = df.copy()

results['PredictedLong'] = predicted[:, 0]
results['PredictedShort'] = predicted[:, 1]

long_signals = results.loc[results['PredictedLong'] > 0.5]
short_signals = results.loc[results['PredictedShort'] > 0.5]

print(
    f"produced total {long_signals.shape[0]} long and {short_signals.shape[0]} signals")




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
test loss, test acc: [0.011353711597621441, 0.9895196557044983]
test loss, test acc: [0.011211414821445942, 0.9880605936050415]
produced total 0 long and 0 signals
