In [None]:
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import joblib

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [2]:
seed = 42

In [3]:
coin = 'KRW-SXP'

In [4]:
df = pd.read_csv(f'../data/from_pyupbit/{coin}.csv')

In [5]:
df

Unnamed: 0,timestamp,open,high,low,close,volume,value
0,2020-12-26 13:11:00,777.0,777.0,777.0,777.0,2350.713400,1.826504e+06
1,2020-12-26 13:12:00,777.0,777.0,777.0,777.0,1630.621294,1.266993e+06
2,2020-12-26 13:13:00,777.0,778.0,777.0,778.0,1128.288067,8.771163e+05
3,2020-12-26 13:15:00,777.0,777.0,777.0,777.0,6474.727887,5.030864e+06
4,2020-12-26 13:16:00,778.0,778.0,777.0,777.0,7533.733012,5.854151e+06
...,...,...,...,...,...,...,...
1576795,2024-10-12 18:32:00,390.4,391.0,389.9,391.0,350074.951992,1.366919e+08
1576796,2024-10-12 18:33:00,390.9,391.9,390.7,391.4,181924.275797,7.114177e+07
1576797,2024-10-12 18:34:00,391.4,392.1,391.3,391.8,221882.023167,8.694523e+07
1576798,2024-10-12 18:35:00,391.5,393.5,391.5,392.6,398580.407319,1.564076e+08


In [6]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

In [7]:
df

Unnamed: 0_level_0,open,high,low,close,volume,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-26 13:11:00,777.0,777.0,777.0,777.0,2350.713400,1.826504e+06
2020-12-26 13:12:00,777.0,777.0,777.0,777.0,1630.621294,1.266993e+06
2020-12-26 13:13:00,777.0,778.0,777.0,778.0,1128.288067,8.771163e+05
2020-12-26 13:15:00,777.0,777.0,777.0,777.0,6474.727887,5.030864e+06
2020-12-26 13:16:00,778.0,778.0,777.0,777.0,7533.733012,5.854151e+06
...,...,...,...,...,...,...
2024-10-12 18:32:00,390.4,391.0,389.9,391.0,350074.951992,1.366919e+08
2024-10-12 18:33:00,390.9,391.9,390.7,391.4,181924.275797,7.114177e+07
2024-10-12 18:34:00,391.4,392.1,391.3,391.8,221882.023167,8.694523e+07
2024-10-12 18:35:00,391.5,393.5,391.5,392.6,398580.407319,1.564076e+08


In [8]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df['close'].values.reshape(-1, 1))
joblib.dump(scaler, f'models/{coin}_scaler.pkl')

['models/KRW-SXP_scaler.pkl']

In [9]:
timestep = 1

In [10]:
X = []
y = []
for i in range(len(scaled_data) - timestep - 1):
    X.append(scaled_data[i:(i + timestep), 0])
    y.append(scaled_data[i + timestep, 0])

In [11]:
X, y = np.array(X), np.array(y)

In [12]:
train_size = int(len(X) * .8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [13]:
train_dates = df.index[:train_size]
test_dates = df.index[train_size:]

In [14]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [15]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(timestep, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

In [16]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [17]:
early_stop = EarlyStopping(monitor='val_loss', patience=10)

In [18]:
model.fit(X_train, y_train, batch_size=32, epochs=20,
          validation_data=(X_test, y_test), callbacks=[early_stop])

Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x2eda67fd0>

In [35]:
model.save(f'models/lstm_{coin}.h5')

  saving_api.save_model(


In [20]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)



In [21]:
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
y_train = scaler.inverse_transform([y_train])
y_test = scaler.inverse_transform([y_test])

In [22]:
train_score = np.sqrt(mean_squared_error(y_train[0], train_predict[:, 0]))
test_score = np.sqrt(mean_squared_error(y_test[0], test_predict[:, 0]))
print(f'Train RMSE: {train_score:.2f}')
print(f'Test RMSE: {test_score:.2f}')

Train RMSE: 13.99
Test RMSE: 4.64


In [26]:
last_1_day = scaled_data[-timestep:]
X_predict = last_1_day.reshape(1, timestep, 1)
print(last_1_day)

[[0.02063342]]


In [27]:
pred = model.predict(X_predict)
pred = scaler.inverse_transform(pred)
print(pred)

[[388.0206]]


In [33]:
preds = []
for i in range(30):
    pred = model.predict(X_predict)
    preds.append(scaler.inverse_transform(pred)[0][0])
    pred = pred.reshape(1, 1, 1)
    X_predict = np.append(X_predict[:, 1:, :], pred, axis=1).reshape(1, timestep, 1)



In [34]:
preds

[7918.2964,
 7901.353,
 7885.5864,
 7870.9004,
 7857.206,
 7844.4243,
 7832.488,
 7821.3286,
 7810.892,
 7801.123,
 7791.9717,
 7783.396,
 7775.356,
 7767.8125,
 7760.733,
 7754.0845,
 7747.84,
 7741.9727,
 7736.4565,
 7731.27,
 7726.3896,
 7721.7974,
 7717.4756,
 7713.4077,
 7709.5757,
 7705.967,
 7702.5674,
 7699.3647,
 7696.344,
 7693.4976]