In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tensorflow.keras import layers, Model

btc = pd.read_csv('./data/KRW-BTC_historical.csv')

In [12]:
btc.isna().sum()

timestamp           0
market              0
trade_price         0
change_rate         1
acc_trade_volume    0
dtype: int64

In [13]:
btc['change_rate']=btc['change_rate'].fillna(0)

In [14]:
features = ["trade_price", "change_rate", "acc_trade_volume"]
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(btc[features])

In [15]:
price_scaler = MinMaxScaler()
data_price = btc[["trade_price"]].values
price_scaler.fit(data_price)

0,1,2
,feature_range,"(0, ...)"
,copy,True
,clip,False


In [16]:
def create_sequences(data, seq_len=24, pred_len=1):
    X, y = [], []
    for i in range(len(data) - seq_len - pred_len + 1):
        X.append(data[i:i+seq_len])           # 입력: 모든 feature
        y.append(data[i+seq_len:i+seq_len+pred_len, 0])  # 출력: trade_price만
    return np.array(X), np.array(y)

X, y = create_sequences(data_scaled, seq_len=24, pred_len=1)

In [17]:
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [18]:
X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)

In [19]:
class LSTMModel(tf.keras.Model):
    def __init__(self, input_size, hidden_size=256, output_size=24, num_layers=2, dropout=0.2):
        super(LSTMModel, self).__init__()
        
        # LSTM 층 여러 개 쌓기
        self.lstm_layers = []
        for i in range(num_layers - 1):
            self.lstm_layers.append(
                layers.LSTM(hidden_size, return_sequences=True, dropout=dropout)
            )
        # 마지막 LSTM 층 (return_sequences=False → 마지막 hidden state만 반환)
        self.lstm_layers.append(
            layers.LSTM(hidden_size, return_sequences=False, dropout=dropout)
        )
        
        # Fully connected (Dense) layer
        self.fc = layers.Dense(output_size)

    def call(self, x):
        out = x
        for lstm in self.lstm_layers:
            out = lstm(out)
        out = self.fc(out)
        return out

In [None]:
input_size = X_train.shape[2]
hidden_size = 256
output_size = 1

model = LSTMModel(input_size=input_size, hidden_size=hidden_size, output_size=output_size)

seq_len = X_train.shape[1]
model.build(input_shape=(None, seq_len, input_size))

model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=[tf.keras.metrics.MeanAbsoluteError()]
)

model.summary()



In [None]:
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1
)

test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.6f}, Test MAE: {test_mae:.6f}")

Epoch 1/100


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 60ms/step - loss: 0.0015 - mean_absolute_error: 0.0123 - val_loss: 3.7954e-05 - val_mean_absolute_error: 0.0044
Epoch 2/100
[1m108/438[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m18s[0m 55ms/step - loss: 6.0104e-05 - mean_absolute_error: 0.0053

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_test_scaled = np.expm1(y_test)
y_pred_scaled = np.expm1(y_pred)

In [None]:
y_test_scaled = price_scaler.inverse_transform(y_test.numpy())
y_pred_scaled = price_scaler.inverse_transform(y_pred.numpy())

In [None]:
y_test_scaled

array([[1.3666400e+08],
       [1.3687301e+08],
       [1.3699000e+08],
       ...,
       [1.5975101e+08],
       [1.6025400e+08],
       [1.6039901e+08]], dtype=float32)

In [None]:
y_pred_scaled

array([[1.3675619e+08],
       [1.3675979e+08],
       [1.3678029e+08],
       ...,
       [1.5966395e+08],
       [1.5961282e+08],
       [1.5964558e+08]], dtype=float32)

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

rmse = np.sqrt(mean_squared_error(y_test_scaled, y_pred_scaled))
print(f"Validation RMSE: {rmse:.4f}")

Validation RMSE: 895174.8125
