In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
from collections import deque
from sklearn import preprocessing
import random

In [2]:
main_dataframe = pd.DataFrame()

In [3]:
currencies = ['BCH-USD', 'BTC-USD', 'ETH-USD', 'LTC-USD']
for c in currencies:
    dataset = 'data/' + c + '.csv'
    df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])
    df.rename(columns={'close': c + '_close', 'volume': c + '_volume'}, inplace=True)
    df.set_index('time', inplace=True)
    df = df[[c + '_close', c + '_volume']]
    if len(main_dataframe) == 0:
        main_dataframe = df
    else:
        main_dataframe = main_dataframe.join(df)

In [4]:
main_dataframe.head()

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,,,96.580002,9.6472
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978


In [5]:
main_dataframe.isnull().sum()

BCH-USD_close        0
BCH-USD_volume       0
BTC-USD_close     5122
BTC-USD_volume    5122
ETH-USD_close      195
ETH-USD_volume     195
LTC-USD_close      836
LTC-USD_volume     836
dtype: int64

In [6]:
main_dataframe.fillna(method='ffill', inplace=True)

  main_dataframe.fillna(method='ffill', inplace=True)


In [7]:
main_dataframe.isnull().sum()

BCH-USD_close     0
BCH-USD_volume    0
BTC-USD_close     0
BTC-USD_volume    0
ETH-USD_close     1
ETH-USD_volume    1
LTC-USD_close     0
LTC-USD_volume    0
dtype: int64

In [8]:
main_dataframe.head()

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,,,96.580002,9.6472
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978


In [9]:
main_dataframe.fillna(method='bfill', inplace=True)

  main_dataframe.fillna(method='bfill', inplace=True)


In [10]:
main_dataframe.head()

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,486.01001,26.019083,96.580002,9.6472
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978


In [11]:
main_dataframe.isnull().sum()

BCH-USD_close     0
BCH-USD_volume    0
BTC-USD_close     0
BTC-USD_volume    0
ETH-USD_close     0
ETH-USD_volume    0
LTC-USD_close     0
LTC-USD_volume    0
dtype: int64

In [12]:
main_dataframe['future'] = main_dataframe['LTC-USD_close'].shift(-3)

In [13]:
main_dataframe.head()

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume,future
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,486.01001,26.019083,96.580002,9.6472,96.5
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024,96.389999
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799,96.519997
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067,96.440002
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978,96.470001


In [14]:
def compare(current, future):
    if future > current:
        return 1
    else:
        return 0
main_dataframe['target'] = list(map(compare, main_dataframe['LTC-USD_close'], main_dataframe['future']))

In [34]:
main_dataframe.head()

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,486.01001,26.019083,96.580002,9.6472,96.5,0
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024,96.389999,0
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799,96.519997,0
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067,96.440002,0
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978,96.470001,1


In [16]:
times = sorted(main_dataframe.index.values)

In [17]:
len(times)

92225

In [18]:
int(0.1 * len(times))

9222

In [19]:
last_10pct = sorted(main_dataframe.index.values)[-int(0.1 * len(times))]

In [20]:
last_10pct

np.int64(1534556340)

In [22]:
main_dataframe_test = main_dataframe[(main_dataframe.index >= last_10pct)]
main_dataframe_train = main_dataframe[(main_dataframe.index < last_10pct)]

In [23]:
len(main_dataframe_test)

9222

In [24]:
len(main_dataframe_train)

83003

In [25]:
main_dataframe['BTC-USD_close']

time
1528968660    6489.549805
1528968720    6487.379883
1528968780    6479.410156
1528968840    6479.410156
1528968900    6479.979980
                 ...     
1535215020    6714.520020
1535215080    6714.520020
1535215140    6715.000000
1535215200    6715.000000
1535215260    6715.000000
Name: BTC-USD_close, Length: 92225, dtype: float64

In [28]:
main_dataframe['BTC-USD_close'].pct_change()

time
1528968660         NaN
1528968720   -0.000334
1528968780   -0.001228
1528968840    0.000000
1528968900    0.000088
                ...   
1535215020    0.000206
1535215080    0.000000
1535215140    0.000071
1535215200    0.000000
1535215260    0.000000
Name: BTC-USD_close, Length: 92225, dtype: float64

In [35]:
def preprocess_main_dataframe(df):
    df = df.drop('future', axis=1)
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    sequences = []
    prev_days = deque(maxlen=30)
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == 30:
            sequences.append([np.array(prev_days), i[-1]])
    random.shuffle(sequences)
    buys = []
    sells = []
    for seq, target in sequences:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
    random.shuffle(buys)
    random.shuffle(sells)
    lower = min(len(buys), len(sells))
    buys = buys[:lower]
    sells = sells[:lower]
    sequential_data = buys + sells
    random.shuffle(sequential_data)
    X = []
    y = []
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    return np.array(X), np.array(y)

In [36]:
train_X, train_y = preprocess_main_dataframe(main_dataframe_train)

In [37]:
train_X.shape

(71030, 30, 8)

In [38]:
train_y.shape

(71030,)

In [39]:
np.unique(train_y, return_counts=True)

(array([0., 1.]), array([35515, 35515]))

In [40]:
test_X, test_y = preprocess_main_dataframe(main_dataframe_test)

In [41]:
test_X.shape

(7840, 30, 8)

In [45]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input

In [46]:
model = Sequential()
model.add(Input(shape=(train_X.shape[1:])))
model.add(LSTM(64))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [47]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [48]:
history = model.fit(train_X, train_y, batch_size=100, epochs=100, validation_data=(test_X, test_y))

Epoch 1/100
[1m711/711[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.5580 - loss: 0.6836 - val_accuracy: 0.5694 - val_loss: 0.6785
Epoch 2/100
[1m711/711[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.5661 - loss: 0.6797 - val_accuracy: 0.5727 - val_loss: 0.6775
Epoch 3/100
[1m711/711[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.5697 - loss: 0.6780 - val_accuracy: 0.5690 - val_loss: 0.6786
Epoch 4/100
[1m711/711[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.5732 - loss: 0.6762 - val_accuracy: 0.5747 - val_loss: 0.6771
Epoch 5/100
[1m711/711[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.5781 - loss: 0.6739 - val_accuracy: 0.5764 - val_loss: 0.6774
Epoch 6/100
[1m711/711[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.5808 - loss: 0.6719 - val_accuracy: 0.5611 - val_loss: 0.6835
Epoch 7/100
[1m711/71

KeyboardInterrupt: 