In [29]:
!pip install -Uqq pyts fastbook pandas-ta

In [30]:
import datetime as dt
import os
import warnings
import numpy as np
import pandas as pd
import pywt
import pywt.data
import re
import gc
import pandas_ta as ta
from pyts.image import GramianAngularField
from PIL import Image as im

import fastbook
fastbook.setup_book()
from fastbook import *

In [31]:
SYMBOL = 'BTCUSDT'
INTERVAL = '1m'
INPUT_SIZE = 30
RAW_INPUT_SIZE = 100
THRESHOLD = 0.97
TRADING_PERCENT = 0.1
INITIAL_USD_BALANCE = 1000
STOP_PROFIT = 0.004
STOP_LOSS = 0.004
ORDER_LIFE = 5

In [32]:
wavelet_type = 'sym15'
w = pywt.Wavelet(wavelet_type)
def denoise(data):
    if len(data) > 0:
        maxlev = pywt.dwt_max_level(len(data), w.dec_len)
        coeffs = pywt.wavedec(data, wavelet_type, level=maxlev)
        coeffs[-1] = np.zeros_like(coeffs[-1])
        datarec = pywt.waverec(coeffs, wavelet_type)
        return datarec
    else:
        return data

In [33]:
df = pd.read_csv("../input/binance-1m/binance.csv", header=0,
                 names=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'].apply(lambda x: pd.to_datetime(x))

0        1970-01-01 00:27:41.990460
1        1970-01-01 00:27:41.990520
2        1970-01-01 00:27:41.990580
3        1970-01-01 00:27:41.990640
4        1970-01-01 00:27:41.990700
                    ...            
443053   1970-01-01 00:28:08.578440
443054   1970-01-01 00:28:08.578500
443055   1970-01-01 00:28:08.578560
443056   1970-01-01 00:28:08.578620
443057   1970-01-01 00:28:08.578680
Name: timestamp, Length: 443058, dtype: datetime64[ns]

In [34]:

def calc_label(dataset, position):
    price = dataset['close'].to_numpy()[position]
    stop_loss = STOP_LOSS * price
    stop_profit = STOP_PROFIT * price
    label = "wait"
    bearish_stop_loss = price + stop_loss
    bearish_stop_profit = price - stop_profit
    bullish_stop_loss = price - stop_loss
    bullish_stop_profit = price + stop_profit
    for i in range(1, ORDER_LIFE+1):
        max_price = dataset['high'].to_numpy()[position+i]
        low_price = dataset['low'].to_numpy()[position+i]
        if low_price > bullish_stop_loss:
            if max_price >= bullish_stop_profit:
                label = "buy"
                break
        else:
            break
    for i in range(1, ORDER_LIFE+1):
        max_price = dataset['high'].to_numpy()[position+i]
        low_price = dataset['low'].to_numpy()[position+i]
        if max_price < bearish_stop_loss:
            if low_price <= bearish_stop_profit:
                label = "sell"
                break
        else:
            break
    return label

In [35]:
L = len(df['close'])
df_labels = [None]*L
for i in range(0, L-ORDER_LIFE):
    df_labels[i] = calc_label(df, i)

df['label'] = df_labels

In [36]:

df = df.iloc[:-ORDER_LIFE].reset_index(drop=True).copy()

In [37]:
DS_LENGTH = len(df['close'])
input_open = [None]*DS_LENGTH
input_high = [None]*DS_LENGTH
input_low = [None]*DS_LENGTH
input_close = [None]*DS_LENGTH
input_volume = [None]*DS_LENGTH
input_timestamp = [None]*DS_LENGTH

In [38]:
for i in range(0, DS_LENGTH):
    if i + 1 >= RAW_INPUT_SIZE:
        input_open[i] = df.iloc[i+1-RAW_INPUT_SIZE:i+1].open.copy()
df_label = df[['label']].copy()
df_label['input_open'] = input_open
df_label = df_label.iloc[RAW_INPUT_SIZE:]
df_label = df_label[df_label['label'] != 'wait'].reset_index(drop=True).copy()
df_label.to_json('/kaggle/working/input_open.json', orient='records')
del df_label
gc.collect()

39986454

In [39]:
for i in range(0, DS_LENGTH):
    if i + 1 >= RAW_INPUT_SIZE:
        input_high[i] = df.iloc[i+1-RAW_INPUT_SIZE:i+1].high.copy()
df_label = df[['label']].copy()
df_label['input_high'] = input_high
df_label = df_label.iloc[RAW_INPUT_SIZE:]
df_label = df_label[df_label['label'] != 'wait'].reset_index(drop=True).copy()
df_label.to_json('/kaggle/working/input_high.json', orient='records')
del df_label
gc.collect()

0

In [40]:
for i in range(0, DS_LENGTH):
    if i + 1 >= RAW_INPUT_SIZE:
        input_low[i] = df.iloc[i+1-RAW_INPUT_SIZE:i+1].low.copy()
df_label = df[['label']].copy()
df_label['input_low'] = input_low
df_label = df_label.iloc[RAW_INPUT_SIZE:]
df_label = df_label[df_label['label'] != 'wait'].reset_index(drop=True).copy()
df_label.to_json('/kaggle/working/input_low.json', orient='records')
del df_label
gc.collect()

0

In [41]:
for i in range(0, DS_LENGTH):
    if i + 1 >= RAW_INPUT_SIZE:
        input_close[i] = df.iloc[i+1-RAW_INPUT_SIZE:i+1].close.copy()
df_label = df[['label']].copy()
df_label['input_close'] = input_close
df_label = df_label.iloc[RAW_INPUT_SIZE:]
df_label = df_label[df_label['label'] != 'wait'].reset_index(drop=True).copy()
df_label.to_json('/kaggle/working/input_close.json', orient='records')
del df_label
gc.collect()

0

In [42]:
for i in range(0, DS_LENGTH):
    if i + 1 >= RAW_INPUT_SIZE:
        input_volume[i] = df.iloc[i+1-RAW_INPUT_SIZE:i+1].volume.copy()
df_label = df[['label']].copy()
df_label['input_volume'] = input_volume
df_label = df_label.iloc[RAW_INPUT_SIZE:]
df_label = df_label[df_label['label'] != 'wait'].reset_index(drop=True).copy()
df_label.to_json('/kaggle/working/input_volume.json', orient='records')
del df_label
gc.collect()

0

In [43]:
for i in range(0, DS_LENGTH):
    if i + 1 >= RAW_INPUT_SIZE:
        input_timestamp[i] = df.iloc[i+1-RAW_INPUT_SIZE:i +
                                     1].timestamp.copy().reset_index(drop=True)
df_label = df[['label']].copy()
df_label['input_timestamp'] = input_timestamp
df_label = df_label.iloc[RAW_INPUT_SIZE:]
df_label = df_label[df_label['label'] != 'wait'].reset_index(drop=True).copy()
df_label.to_json('/kaggle/working/input_timestamp.json', orient='records')

In [44]:
DS_LENGTH = len(df_label['label'])
del df_label
del df
gc.collect()

0

In [45]:
df = pd.read_json('/kaggle/working/input_open.json')
df

Unnamed: 0,label,input_open
0,sell,"[20126.77, 20134.39, 20127.45, 20137.82, 20127.24, 20139.3, 20140.23, 20154.54, 20156.16, 20155.79, 20148.08, 20159.98, 20152.3, 20162.3, 20163.86, 20170.01, 20188.75, 20177.99, 20190.74, 20172.72, 20163.79, 20171.66, 20175.47, 20179.54, 20180.68, 20179.96, 20174.32, 20186.24, 20199.72, 20198.78, 20180.29, 20190.27, 20188.05, 20191.95, 20188.66, 20185.27, 20184.46, 20182.56, 20181.4, 20175.57, 20174.48, 20177.81, 20138.62, 20141.32, 20143.13, 20119.53, 20131.97, 20130.34, 20118.83, 20121.36, 20098.39, 20082.59, 20088.46, 20071.68, 20090.48, 20089.18, 20097.15, 20098.14, 20101.2, 20100.31, ..."
1,sell,"[20134.39, 20127.45, 20137.82, 20127.24, 20139.3, 20140.23, 20154.54, 20156.16, 20155.79, 20148.08, 20159.98, 20152.3, 20162.3, 20163.86, 20170.01, 20188.75, 20177.99, 20190.74, 20172.72, 20163.79, 20171.66, 20175.47, 20179.54, 20180.68, 20179.96, 20174.32, 20186.24, 20199.72, 20198.78, 20180.29, 20190.27, 20188.05, 20191.95, 20188.66, 20185.27, 20184.46, 20182.56, 20181.4, 20175.57, 20174.48, 20177.81, 20138.62, 20141.32, 20143.13, 20119.53, 20131.97, 20130.34, 20118.83, 20121.36, 20098.39, 20082.59, 20088.46, 20071.68, 20090.48, 20089.18, 20097.15, 20098.14, 20101.2, 20100.31, 20101.47, ..."
2,sell,"[20137.82, 20127.24, 20139.3, 20140.23, 20154.54, 20156.16, 20155.79, 20148.08, 20159.98, 20152.3, 20162.3, 20163.86, 20170.01, 20188.75, 20177.99, 20190.74, 20172.72, 20163.79, 20171.66, 20175.47, 20179.54, 20180.68, 20179.96, 20174.32, 20186.24, 20199.72, 20198.78, 20180.29, 20190.27, 20188.05, 20191.95, 20188.66, 20185.27, 20184.46, 20182.56, 20181.4, 20175.57, 20174.48, 20177.81, 20138.62, 20141.32, 20143.13, 20119.53, 20131.97, 20130.34, 20118.83, 20121.36, 20098.39, 20082.59, 20088.46, 20071.68, 20090.48, 20089.18, 20097.15, 20098.14, 20101.2, 20100.31, 20101.47, 20118.97, 20119.43, ..."
3,sell,"[20127.24, 20139.3, 20140.23, 20154.54, 20156.16, 20155.79, 20148.08, 20159.98, 20152.3, 20162.3, 20163.86, 20170.01, 20188.75, 20177.99, 20190.74, 20172.72, 20163.79, 20171.66, 20175.47, 20179.54, 20180.68, 20179.96, 20174.32, 20186.24, 20199.72, 20198.78, 20180.29, 20190.27, 20188.05, 20191.95, 20188.66, 20185.27, 20184.46, 20182.56, 20181.4, 20175.57, 20174.48, 20177.81, 20138.62, 20141.32, 20143.13, 20119.53, 20131.97, 20130.34, 20118.83, 20121.36, 20098.39, 20082.59, 20088.46, 20071.68, 20090.48, 20089.18, 20097.15, 20098.14, 20101.2, 20100.31, 20101.47, 20118.97, 20119.43, 20112.2, 2..."
4,sell,"[20139.3, 20140.23, 20154.54, 20156.16, 20155.79, 20148.08, 20159.98, 20152.3, 20162.3, 20163.86, 20170.01, 20188.75, 20177.99, 20190.74, 20172.72, 20163.79, 20171.66, 20175.47, 20179.54, 20180.68, 20179.96, 20174.32, 20186.24, 20199.72, 20198.78, 20180.29, 20190.27, 20188.05, 20191.95, 20188.66, 20185.27, 20184.46, 20182.56, 20181.4, 20175.57, 20174.48, 20177.81, 20138.62, 20141.32, 20143.13, 20119.53, 20131.97, 20130.34, 20118.83, 20121.36, 20098.39, 20082.59, 20088.46, 20071.68, 20090.48, 20089.18, 20097.15, 20098.14, 20101.2, 20100.31, 20101.47, 20118.97, 20119.43, 20112.2, 20125.5, 20..."
...,...,...
21803,sell,"[30707.76, 30700.0, 30700.0, 30702.0, 30700.0, 30680.95, 30678.98, 30678.0, 30666.84, 30648.81, 30623.02, 30610.69, 30610.67, 30591.99, 30560.54, 30512.69, 30508.02, 30504.8, 30531.58, 30549.99, 30558.0, 30540.02, 30543.99, 30523.53, 30513.99, 30536.13, 30524.43, 30511.96, 30518.17, 30517.99, 30527.89, 30526.0, 30526.01, 30516.65, 30512.0, 30437.83, 30430.21, 30438.0, 30454.01, 30470.01, 30475.0, 30475.99, 30452.0, 30438.34, 30465.73, 30474.01, 30470.74, 30448.0, 30442.02, 30442.0, 30458.0, 30444.0, 30431.09, 30443.99, 30473.91, 30478.72, 30480.01, 30467.98, 30459.99, 30456.01, 30442.48, 3..."
21804,sell,"[30700.0, 30700.0, 30702.0, 30700.0, 30680.95, 30678.98, 30678.0, 30666.84, 30648.81, 30623.02, 30610.69, 30610.67, 30591.99, 30560.54, 30512.69, 30508.02, 30504.8, 30531.58, 30549.99, 30558.0, 30540.02, 30543.99, 30523.53, 30513.99, 30536.13, 30524.43, 30511.96, 30518.17, 30517.99, 30527.89, 30526.0, 30526.01, 30516.65, 30512.0, 30437.83, 30430.21, 30438.0, 30454.01, 30470.01, 30475.0, 30475.99, 30452.0, 30438.34, 30465.73, 30474.01, 30470.74, 30448.0, 30442.02, 30442.0, 30458.0, 30444.0, 30431.09, 30443.99, 30473.91, 30478.72, 30480.01, 30467.98, 30459.99, 30456.01, 30442.48, 30442.5, 30..."
21805,sell,"[30700.0, 30702.0, 30700.0, 30680.95, 30678.98, 30678.0, 30666.84, 30648.81, 30623.02, 30610.69, 30610.67, 30591.99, 30560.54, 30512.69, 30508.02, 30504.8, 30531.58, 30549.99, 30558.0, 30540.02, 30543.99, 30523.53, 30513.99, 30536.13, 30524.43, 30511.96, 30518.17, 30517.99, 30527.89, 30526.0, 30526.01, 30516.65, 30512.0, 30437.83, 30430.21, 30438.0, 30454.01, 30470.01, 30475.0, 30475.99, 30452.0, 30438.34, 30465.73, 30474.01, 30470.74, 30448.0, 30442.02, 30442.0, 30458.0, 30444.0, 30431.09, 30443.99, 30473.91, 30478.72, 30480.01, 30467.98, 30459.99, 30456.01, 30442.48, 30442.5, 30435.48, 3..."
21806,sell,"[30702.0, 30700.0, 30680.95, 30678.98, 30678.0, 30666.84, 30648.81, 30623.02, 30610.69, 30610.67, 30591.99, 30560.54, 30512.69, 30508.02, 30504.8, 30531.58, 30549.99, 30558.0, 30540.02, 30543.99, 30523.53, 30513.99, 30536.13, 30524.43, 30511.96, 30518.17, 30517.99, 30527.89, 30526.0, 30526.01, 30516.65, 30512.0, 30437.83, 30430.21, 30438.0, 30454.01, 30470.01, 30475.0, 30475.99, 30452.0, 30438.34, 30465.73, 30474.01, 30470.74, 30448.0, 30442.02, 30442.0, 30458.0, 30444.0, 30431.09, 30443.99, 30473.91, 30478.72, 30480.01, 30467.98, 30459.99, 30456.01, 30442.48, 30442.5, 30435.48, 30437.83, ..."


In [46]:
denoised_input_open = [None]*DS_LENGTH
denoised_input_high = [None]*DS_LENGTH
denoised_input_low = [None]*DS_LENGTH
denoised_input_close = [None]*DS_LENGTH
denoised_input_volume = [None]*DS_LENGTH

df = pd.read_json('/kaggle/working/input_open.json')
df_denoised = df[['label']].copy()
for i in range(0, DS_LENGTH):
    denoised_input_open[i] = denoise(df['input_open'][i])
del df

df = pd.read_json('/kaggle/working/input_high.json')
for i in range(0, DS_LENGTH):
    denoised_input_high[i] = denoise(df['input_high'][i])
del df

df = pd.read_json('/kaggle/working/input_low.json')
for i in range(0, DS_LENGTH):
    denoised_input_low[i] = denoise(df['input_low'][i])
del df

df = pd.read_json('/kaggle/working/input_close.json')
for i in range(0, DS_LENGTH):
    denoised_input_close[i] = denoise(df['input_close'][i])
del df

df = pd.read_json('/kaggle/working/input_volume.json')
for i in range(0, DS_LENGTH):
    denoised_input_volume[i] = denoise(df['input_volume'][i])
del df

df = pd.read_json('/kaggle/working/input_timestamp.json')



ValueError: Cannot set a DataFrame with multiple columns to the single column denoised_input_open

In [47]:
df_denoised['denoised_input_open'] = pd.Series(denoised_input_open).copy()
df_denoised['denoised_input_high'] = pd.Series(denoised_input_high).copy()
df_denoised['denoised_input_low'] = pd.Series(denoised_input_low).copy()
df_denoised['denoised_input_close'] = pd.Series(denoised_input_close).copy()
df_denoised['denoised_input_volume'] = pd.Series(denoised_input_volume).copy()
df_denoised['input_timestamp'] = pd.Series(df['input_timestamp']).copy()

del df
gc.collect()

415

In [48]:
print(df_denoised.info())
print(df_denoised.isnull().values.any())
print(pd.Series(df_denoised['denoised_input_open'][5].info()))
print(df_denoised['denoised_input_open'][5].isnull().values.any())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21808 entries, 0 to 21807
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   label                  21808 non-null  object
 1   denoised_input_open    21808 non-null  object
 2   denoised_input_high    21808 non-null  object
 3   denoised_input_low     21808 non-null  object
 4   denoised_input_close   21808 non-null  object
 5   denoised_input_volume  21808 non-null  object
 6   input_timestamp        21808 non-null  object
dtypes: object(7)
memory usage: 1.2+ MB
None
False


AttributeError: 'numpy.ndarray' object has no attribute 'info'

In [50]:
ind_list = ['qstick', 't3', 'cti', 'mad', 'ha', 'squeeze', 'aroon', 'bbands', 'kc', 'vwap', 'stoch']
ind_columns = ['qstick', 't3', 'cti', 'mad', 'HA_low', 'SQZ_20_2.0_20_1.5', 'AROONU_14', 'BBU_5_2.0', 'KCBe_20_2', 'vwap', 'STOCHd_14_3_3']

In [59]:
DS_LENGTH = len(df_denoised['denoised_input_open'])
DS_LENGTH

21808

In [77]:
for indi in ind_list:
    print(indi)
    indi_result = {}
    new_cols = []
    for i in range(0, DS_LENGTH):
        indi_input = pd.DataFrame()
        indi_input['open'] = df_denoised['denoised_input_open'][i].copy()
        indi_input['high'] = df_denoised['denoised_input_high'][i].copy()
        indi_input['low'] = df_denoised['denoised_input_low'][i].copy()
        indi_input['close'] = df_denoised['denoised_input_close'][i].copy()
        indi_input['volume'] = df_denoised['denoised_input_volume'][i].copy()
        indi_input['timestamp'] = [pd.to_datetime(x) for x in df_denoised['input_timestamp'][i]]
        indi_input.set_index(pd.DatetimeIndex(
            indi_input["timestamp"]), inplace=True)
        indi_fn = getattr(indi_input.ta, indi)
        data = indi_fn()
        if len(new_cols) == 0:
            if not isinstance(data, pd.Series):
                new_cols = new_cols + data.columns.to_numpy().tolist()
            else:
                new_cols = new_cols + [indi]
            for col_name in new_cols:
                indi_result[col_name] = [None]*DS_LENGTH
        for col_name in new_cols:
            if not isinstance(data, pd.Series):
                indi_result[col_name][i] = data[col_name]
            else:
                indi_result[col_name][i] = data
    for col_name in new_cols:
        if col_name in ind_columns:
            df_denoised[col_name] = indi_result[col_name]


qstick


KeyboardInterrupt: 

In [None]:
gaf_transformer = GramianAngularField(method='difference', image_size=INPUT_SIZE)
df_gaf_input_open = [None]*DS_LENGTH
df_gaf_input_high = [None]*DS_LENGTH
df_gaf_input_low = [None]*DS_LENGTH
df_gaf_input_close = [None]*DS_LENGTH
df_gaf_input_volume = [None]*DS_LENGTH
for i in range(0, DS_LENGTH):
    if len(df['denoised_input_close'][i]) > 0:
        df_gaf_input_open[i] = gaf_transformer.fit_transform(
            df_denoised['denoised_input_open'][i][-INPUT_SIZE:].reshape(1, -1))
        df_gaf_input_high[i] = gaf_transformer.fit_transform(
            df_denoised['denoised_input_high'][i][-INPUT_SIZE:].reshape(1, -1))
        df_gaf_input_low[i] = gaf_transformer.fit_transform(
            df_denoised['denoised_input_low'][i][-INPUT_SIZE:].reshape(1, -1))
        df_gaf_input_close[i] = gaf_transformer.fit_transform(
            df_denoised['denoised_input_close'][i][-INPUT_SIZE:].reshape(1, -1))
        df_gaf_input_volume[i] = gaf_transformer.fit_transform(
            df_denoised['denoised_input_volume'][i][-INPUT_SIZE:].reshape(1, -1))
df_denoised['gaf_open'] = df_gaf_input_open
df_denoised['gaf_high'] = df_gaf_input_high
df_denoised['gaf_low'] = df_gaf_input_low
df_denoised['gaf_close'] = df_gaf_input_close
df_denoised['gaf_volume'] = df_gaf_input_volume
df_denoised.drop(columns=['denoised_input_open', 'denoised_input_high', 'denoised_input_low',
                 'denoised_input_close', 'denoised_input_volume', 'input_timestamp'])
df_denoised = df_denoised.copy()
gc.collect()

In [None]:
for col_name in ind_columns:
    print(col_name)
    gaf_col = [None]*DS_LENGTH
    for i in range(0, DS_LENGTH):
        if len(df_denoised['gaf_open'][i]) > 0:
            if isinstance(df_denoised[col_name][i], pd.Series):
                gaf_col[i] = gaf_transformer.fit_transform(
                    df_denoised[col_name][i][-INPUT_SIZE:].to_numpy().reshape(1, -1))
            else:
                gaf_col[i] = gaf_transformer.fit_transform(
                    df_denoised[col_name][i][-INPUT_SIZE:].reshape(1, -1))
    df_denoised[col_name] = gaf_col


In [None]:
df_train = df_denoised.copy()
gc.collect()
pat = r'^(.*)_\d+.png'
images_path = '/kaggle/working/images/'
if not os.path.exists(images_path):
    os.makedirs(images_path)
files = get_image_files(images_path)
for f in files:
    os.remove(f)
files = get_image_files(images_path)
L = len(df_train['gaf_open'])
for i in range(0, L):
    i_open = df_train['gaf_open'].to_numpy()[i].squeeze()
    i_high = df_train['gaf_high'].to_numpy()[i].squeeze()
    i_low = df_train['gaf_low'].to_numpy()[i].squeeze()
    i_close = df_train['gaf_close'].to_numpy()[i].squeeze()
    i_volume = df_train['gaf_volume'].to_numpy()[i].squeeze()
    inputs_list = [i_open, i_high, i_low, i_close, i_volume] + [df_train[col_name].to_numpy()[i].squeeze() for col_name in ind_columns]
    rows_list = [inputs_list[i:i + 4] for i in range(0, len(inputs_list), 4)]
    image_rows = [np.concatenate(row) for row in rows_list]
    image = np.concatenate(image_rows, axis=1)
    label = df_train['label'].to_numpy()[i]
    matplotlib.image.imsave(images_path + label + '_' + str(i) + '.png', image)


    

In [None]:
images_path = '/kaggle/working/images/'
pat = r'^(.*)_\d+.png'
files = get_image_files(images_path)
dls = ImageDataLoaders.from_name_re(images_path, files, pat)
#dls.show_batch()
learn = vision_learner(dls, resnet34, metrics=error_rate)

In [None]:
#learn.lr_find()

In [None]:
learn.fine_tune(20, 0.001737800776027143) #resnet34 the best so far

In [None]:

#learn.show_results()
#learn.predict(files[0])

In [None]:
#learn.export(fname='/kaggle/working/model.pkl')
