In [1]:
import pandas as pd
import numpy as np
import os
import random

import warnings
warnings.filterwarnings("ignore")

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, layers, optimizers

In [3]:
np.random.seed(2022)
random.seed(2022)
tf.random.set_seed(2022)
os.environ['PYTHONHASHSEED'] = '0'

In [4]:
data = pd.read_csv('./bitstampUSD.csv')

In [5]:
data.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


In [6]:
data.shape

(4857377, 8)

In [7]:
# Data Preprocess
data.index = pd.to_datetime(data['Timestamp'], unit='s')
data = data.drop(['Timestamp'], axis=1)

In [8]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-12-31 07:52:00,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
2011-12-31 07:53:00,,,,,,,
2011-12-31 07:54:00,,,,,,,
2011-12-31 07:55:00,,,,,,,
2011-12-31 07:56:00,,,,,,,


In [9]:
data = data.resample('D').mean()
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-12-31,4.465,4.4825,4.465,4.4825,23.82947,106.330084,4.471603
2012-01-01,4.806667,4.806667,4.806667,4.806667,7.200667,35.25972,4.806667
2012-01-02,5.0,5.0,5.0,5.0,19.048,95.24,5.0
2012-01-03,5.2525,5.2525,5.2525,5.2525,11.00466,58.100651,5.2525
2012-01-04,5.2,5.223333,5.2,5.223333,11.914807,63.119577,5.208159


In [10]:
data.dropna()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-12-31,4.465000,4.482500,4.465000,4.482500,23.829470,106.330084,4.471603
2012-01-01,4.806667,4.806667,4.806667,4.806667,7.200667,35.259720,4.806667
2012-01-02,5.000000,5.000000,5.000000,5.000000,19.048000,95.240000,5.000000
2012-01-03,5.252500,5.252500,5.252500,5.252500,11.004660,58.100651,5.252500
2012-01-04,5.200000,5.223333,5.200000,5.223333,11.914807,63.119577,5.208159
...,...,...,...,...,...,...,...
2021-03-27,55193.240643,55219.665031,55168.757372,55195.415367,1.823877,100884.732367,55193.357260
2021-03-28,55833.608471,55857.735342,55810.425126,55835.012863,1.447939,80632.115263,55832.958824
2021-03-29,56914.886595,56946.213593,56885.242967,56915.629993,3.732887,213754.555988,56913.993819
2021-03-30,58347.805624,58371.955310,58323.956690,58349.357087,2.363999,138231.241926,58346.912268


In [11]:
# Scaling
from sklearn.preprocessing import RobustScaler

rob_scaler = RobustScaler()

data['open'] = rob_scaler.fit_transform(data['Open'].values.reshape(-1,1))
data['high'] = rob_scaler.fit_transform(data['High'].values.reshape(-1,1))
data['low'] = rob_scaler.fit_transform(data['Low'].values.reshape(-1,1))
data['close'] = rob_scaler.fit_transform(data['Close'].values.reshape(-1,1))
data['volume_(BTC)'] = rob_scaler.fit_transform(data['Volume_(BTC)'].values.reshape(-1,1))
data['volume_(Currency)'] = rob_scaler.fit_transform(data['Volume_(Currency)'].values.reshape(-1,1))
data['weighted_Price'] = rob_scaler.fit_transform(data['Weighted_Price'].values.reshape(-1,1))

In [12]:
data.drop(['Open', 'High', 'Low', 'Close', 'Volume_(BTC)', 'Volume_(Currency)', 'Weighted_Price'], axis=1, inplace=True)

In [13]:
data.head()

Unnamed: 0_level_0,open,high,low,close,volume_(BTC),volume_(Currency),weighted_Price
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-12-31,-0.099101,-0.09906,-0.099124,-0.099099,1.919561,-0.196897,-0.099097
2012-01-01,-0.099053,-0.099014,-0.099075,-0.099052,-0.049931,-0.198978,-0.099049
2012-01-02,-0.099025,-0.098987,-0.099047,-0.099025,1.35325,-0.197222,-0.099022
2012-01-03,-0.098989,-0.09895,-0.099011,-0.098989,0.400609,-0.198309,-0.098986
2012-01-04,-0.098996,-0.098955,-0.099019,-0.098993,0.508405,-0.198162,-0.098992


In [14]:
train = data.iloc[:-120]
test = data.iloc[-120:]

In [15]:
train = train.sample(frac=1)
test = test.sample(frac=1)

In [16]:
X_train = train.drop(['weighted_Price'], axis=1)
y_train = train['weighted_Price']

X_test = test.drop(['weighted_Price'], axis=1)
y_test = test['weighted_Price']

In [17]:
X_train.head()

Unnamed: 0_level_0,open,high,low,close,volume_(BTC),volume_(Currency)
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-04-05,-0.035143,-0.035115,-0.035171,-0.035139,-0.10404,-0.110631
2016-08-12,-0.015971,-0.015976,-0.015958,-0.015965,-0.590179,-0.154718
2013-05-13,-0.083131,-0.083102,-0.08315,-0.083134,-0.478275,-0.187828
2019-08-26,1.378591,1.378449,1.378599,1.378455,-0.306274,1.331196
2019-02-15,0.410486,0.410234,0.410703,0.410513,-0.342072,0.295083


In [18]:
X_train = X_train.values
X_test = X_test.values

y_train = y_train.values
y_test = y_test.values

In [19]:
# LSTM : [sample, timestep, features]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

In [20]:
model_lstm = keras.Sequential()
model_lstm.add(layers.LSTM(units=10, activation='relu'))
model_lstm.add(layers.BatchNormalization())
model_lstm.add(layers.Dense(1))

In [21]:
model_lstm.compile(loss='mse', optimizer=optimizers.Adam(learning_rate=0.001))

In [22]:
model_lstm.fit(X_train, y_train, validation_split=0.2, batch_size=32, epochs=10, shuffle=True, verbose=2)

Epoch 1/10
82/82 - 1s - loss: nan - val_loss: nan - 1s/epoch - 13ms/step
Epoch 2/10
82/82 - 0s - loss: nan - val_loss: nan - 87ms/epoch - 1ms/step
Epoch 3/10
82/82 - 0s - loss: nan - val_loss: nan - 84ms/epoch - 1ms/step
Epoch 4/10
82/82 - 0s - loss: nan - val_loss: nan - 87ms/epoch - 1ms/step
Epoch 5/10
82/82 - 0s - loss: nan - val_loss: nan - 86ms/epoch - 1ms/step
Epoch 6/10
82/82 - 0s - loss: nan - val_loss: nan - 86ms/epoch - 1ms/step
Epoch 7/10
82/82 - 0s - loss: nan - val_loss: nan - 86ms/epoch - 1ms/step
Epoch 8/10
82/82 - 0s - loss: nan - val_loss: nan - 87ms/epoch - 1ms/step
Epoch 9/10
82/82 - 0s - loss: nan - val_loss: nan - 85ms/epoch - 1ms/step
Epoch 10/10
82/82 - 0s - loss: nan - val_loss: nan - 82ms/epoch - 1ms/step


<keras.callbacks.History at 0x1ce0d5a2eb0>

In [23]:
from sklearn.metrics import mean_squared_error

y_pred = model_lstm.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [None]:
# 재실행해도 model fit 부분에서 loss 값이 nan 값만 출력되고, 그 결과 mse 값이 출력되지 않는 오류가 계속 발생한다...