In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import Callback
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd

In [None]:
TIMESTEPS = 10 # 10 or 20
EPOCHS = 10
BATCH_SIZE = 16 # 8 or 16
STEPS = int(TIMESTEPS / 2) # TIMESTEPS or int(TIMESTPES / 2)

In [14]:
# 데이터 로드
df_merged = pd.read_csv('final_data.csv')

# 데이터 준비
df = df_merged.copy()

# Feature와 Target 분리
X = df.drop(columns=['trade_price_10min_later']).values
y = df['trade_price_10min_later'].values

# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 시계열 데이터 형식으로 변환 (샘플 간 공유 없음)
timesteps = TIMESTEPS
X_sequences, y_sequences = [], []

# i를 timesteps 크기만큼 건너뛰며 샘플 생성
for i in range(0, len(X_scaled) - timesteps, WINDOW):
    X_sequences.append(X_scaled[i:i+timesteps])
    y_sequences.append(y[i+timesteps])

X_sequences = np.array(X_sequences, dtype=np.float32)
y_sequences = np.array(y_sequences)

# Train, Test 분할 (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=1, shuffle=True)

# 분할 비율 출력
print(f"Train set: {X_train.shape}, Test set: {X_test.shape}")

Train set: (157765, 20, 18), Test set: (39442, 20, 18)


In [16]:
# LSTM 모델 구성
model = Sequential([
    LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 학습 진행을 표시하기 위한 Callback 설정
class TQDMProgressBar(Callback):
    def on_train_begin(self, logs=None):
        self.epochs = self.params['epochs']
        self.tqdm = tqdm(total=self.epochs, desc='Training Progress')

    def on_epoch_end(self, epoch, logs=None):
        self.tqdm.update(1)
        self.tqdm.set_postfix(loss=logs['loss'])

    def on_train_end(self, logs=None):
        self.tqdm.close()

# 학습 과정 시각화 및 히스토리 저장
tqdm_callback = TQDMProgressBar()
history = model.fit(
    X_train, y_train, 
    validation_data=(X_test, y_test),
    epochs=EPOCHS, 
    batch_size=BATCH_SIZE, 
    callbacks=[tqdm_callback]
)

# Loss 및 Accuracy 그래프
plt.figure(figsize=(10, 6))

# Training 및 Test Loss
plt.plot(history.history['loss'], label='Training Loss', linestyle='--')
plt.plot(history.history['val_loss'], label='Test Loss', linestyle='-')

# Training 및 Test Accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy', linestyle='--')
plt.plot(history.history['val_accuracy'], label='Test Accuracy', linestyle='-')

# 그래프 설정
plt.title('Training and Test Loss/Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Value')
plt.legend()
plt.grid(True)

plt.show()

Training Progress:   0%|                                                                        | 0/10 [00:00<?, ?it/s]

Epoch 1/10
 1123/19721 [>.............................] - ETA: 16:45 - loss: 0.6940 - accuracy: 0.5009

KeyboardInterrupt: 

In [None]:
# 최종 평가 (Test Set)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

In [None]:
# Save
model.save(f'lstm_seq{TIMESTEPS}_batch{BATCH_SIZE}_steps{STEPS}.h5')