In [1]:
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import joblib

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [2]:
seed = 42

In [3]:
coin = 'KRW-SUI'

In [4]:
df = pd.read_csv(f'../data/from_pyupbit/{coin}.csv')

In [5]:
df

Unnamed: 0,timestamp,open,high,low,close,volume,value
0,2023-05-04 09:05:00,1825.0,2015.0,1820.0,1975.0,1.313930e+07,2.590944e+10
1,2023-05-04 09:06:00,1970.0,1985.0,1960.0,1965.0,4.593641e+06,9.063733e+09
2,2023-05-04 09:07:00,1970.0,1990.0,1965.0,1990.0,2.737682e+06,5.413675e+09
3,2023-05-04 09:08:00,1985.0,1995.0,1970.0,1975.0,2.330906e+06,4.621071e+09
4,2023-05-04 09:09:00,1975.0,1980.0,1950.0,1960.0,2.320489e+06,4.555203e+09
...,...,...,...,...,...,...,...
701770,2024-10-12 19:41:00,2809.0,2812.0,2809.0,2810.0,6.448054e+04,1.811826e+08
701771,2024-10-12 19:42:00,2809.0,2815.0,2809.0,2812.0,4.873846e+04,1.370615e+08
701772,2024-10-12 19:43:00,2812.0,2814.0,2811.0,2814.0,6.543037e+04,1.839865e+08
701773,2024-10-12 19:44:00,2814.0,2815.0,2812.0,2815.0,2.284715e+04,6.429445e+07


In [6]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

In [7]:
df

Unnamed: 0_level_0,open,high,low,close,volume,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-05-04 09:05:00,1825.0,2015.0,1820.0,1975.0,1.313930e+07,2.590944e+10
2023-05-04 09:06:00,1970.0,1985.0,1960.0,1965.0,4.593641e+06,9.063733e+09
2023-05-04 09:07:00,1970.0,1990.0,1965.0,1990.0,2.737682e+06,5.413675e+09
2023-05-04 09:08:00,1985.0,1995.0,1970.0,1975.0,2.330906e+06,4.621071e+09
2023-05-04 09:09:00,1975.0,1980.0,1950.0,1960.0,2.320489e+06,4.555203e+09
...,...,...,...,...,...,...
2024-10-12 19:41:00,2809.0,2812.0,2809.0,2810.0,6.448054e+04,1.811826e+08
2024-10-12 19:42:00,2809.0,2815.0,2809.0,2812.0,4.873846e+04,1.370615e+08
2024-10-12 19:43:00,2812.0,2814.0,2811.0,2814.0,6.543037e+04,1.839865e+08
2024-10-12 19:44:00,2814.0,2815.0,2812.0,2815.0,2.284715e+04,6.429445e+07


In [8]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df['close'].values.reshape(-1, 1))
joblib.dump(scaler, f'models/{coin}_scaler.pkl')

['models/KRW-SUI_scaler.pkl']

In [9]:
timestep = 1

In [10]:
X = []
y = []
for i in range(len(scaled_data) - timestep - 1):
    X.append(scaled_data[i:(i + timestep), 0])
    y.append(scaled_data[i + timestep, 0])

In [11]:
X, y = np.array(X), np.array(y)

In [12]:
train_size = int(len(X) * .8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [13]:
train_dates = df.index[:train_size]
test_dates = df.index[train_size:]

In [14]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [15]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(timestep, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

In [16]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [17]:
early_stop = EarlyStopping(monitor='val_loss', patience=10)

In [18]:
model.fit(X_train, y_train, batch_size=32, epochs=20,
          validation_data=(X_test, y_test), callbacks=[early_stop])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


<keras.callbacks.History at 0x26ac496f8e0>

In [19]:
model.save(f'models/lstm_{coin}.h5')

In [20]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)



In [21]:
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
y_train = scaler.inverse_transform([y_train])
y_test = scaler.inverse_transform([y_test])

In [22]:
test_predict

array([[1198.7217],
       [1197.7267],
       [1200.7118],
       ...,
       [2801.858 ],
       [2803.8438],
       [2805.8289]], dtype=float32)

In [23]:
y_test

array([[1198., 1201., 1193., ..., 2812., 2814., 2815.]])

In [24]:
train_score = np.sqrt(mean_squared_error(y_train[0], train_predict[:, 0]))
test_score = np.sqrt(mean_squared_error(y_test[0], test_predict[:, 0]))
print(f'Train RMSE: {train_score:.2f}')
print(f'Test RMSE: {test_score:.2f}')

Train RMSE: 4.27
Test RMSE: 4.28


In [25]:
last_1_day = scaled_data[-timestep:]
X_predict = last_1_day.reshape(1, timestep, 1)
print(last_1_day)

[[0.85825315]]


In [26]:
pred = model.predict(X_predict)
pred = scaler.inverse_transform(pred)
print(pred)

[[2806.8213]]


In [27]:
preds = []
for i in range(30):
    pred = model.predict(X_predict)
    preds.append(scaler.inverse_transform(pred)[0][0])
    pred = pred.reshape(1, 1, 1)
    X_predict = np.append(X_predict[:, 1:, :], pred, axis=1).reshape(1, timestep, 1)



In [28]:
preds

[2806.8213,
 2798.7021,
 2790.6416,
 2782.6377,
 2774.6887,
 2766.7935,
 2758.9502,
 2751.1575,
 2743.4146,
 2735.7195,
 2728.071,
 2720.4688,
 2712.9114,
 2705.3972,
 2697.9255,
 2690.4956,
 2683.1062,
 2675.7563,
 2668.4453,
 2661.1724,
 2653.9363,
 2646.7366,
 2639.5728,
 2632.443,
 2625.348,
 2618.2861,
 2611.2576,
 2604.261,
 2597.2961,
 2590.3625]