In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from datetime import timedelta

# 1. 데이터 불러오기
df = pd.read_csv("/content/구리 선물 과거 데이터 (천).csv")  # 로컬 경로로 수정하세요

# 2. 전처리
df['날짜'] = pd.to_datetime(df['날짜'].str.replace(" ", ""), format="%Y-%m-%d")
cols_to_clean = ['종가', '시가', '고가', '저가', '거래량']
for col in cols_to_clean:
    df[col] = df[col].astype(str).str.replace(",", "").str.replace("K", "e3").astype(float)
df['변동 %'] = df['변동 %'].str.replace('%', '').astype(float)
df = df.sort_values(by="날짜").reset_index(drop=True)

# 3. 정규화 및 시퀀스 생성
close_prices = df['종가'].values.reshape(-1, 1)
scaler = MinMaxScaler()
scaled_close = scaler.fit_transform(close_prices)

def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_close)

# 4. Tensor 변환 및 DataLoader
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)
dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 5. LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1])
        return out

model = LSTMModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 6. 학습 루프
for epoch in range(10):
    model.train()
    total_loss = 0
    for batch_x, batch_y in dataloader:
        pred = model(batch_x)
        loss = criterion(pred, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/10, Loss: {total_loss:.4f}")

# 7. 향후 365일 예측 함수
def predict_future_prices(model, scaled_close, scaler, days=365, seq_length=60):
    model.eval()
    predictions = []
    current_seq = torch.tensor(scaled_close[-seq_length:].reshape(1, seq_length, 1), dtype=torch.float32)

    with torch.no_grad():
        for _ in range(days):
            pred = model(current_seq)
            predictions.append(pred.item())
            current_seq = torch.cat((current_seq[:, 1:, :], pred.view(1, 1, 1)), dim=1)

    predicted_prices = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
    last_date = pd.to_datetime(df['날짜'].max())
    future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=days, freq='B')

    return pd.DataFrame({'날짜': future_dates, '예측 종가': predicted_prices.flatten()})

Epoch 1/10, Loss: 4.6841
Epoch 2/10, Loss: 0.1433
Epoch 3/10, Loss: 0.1033
Epoch 4/10, Loss: 0.0907
Epoch 5/10, Loss: 0.0802
Epoch 6/10, Loss: 0.0797
Epoch 7/10, Loss: 0.0790
Epoch 8/10, Loss: 0.0624
Epoch 9/10, Loss: 0.0642
Epoch 10/10, Loss: 0.0631
          날짜     예측 종가
0 2025-04-04  5.025309
1 2025-04-07  5.004325
2 2025-04-08  4.989262
3 2025-04-09  4.977662
4 2025-04-10  4.968056


In [3]:
# 사용 예
future_df = predict_future_prices(model, scaled_close, scaler, days=365)
future_df

Unnamed: 0,날짜,예측 종가
0,2025-04-04,5.025309
1,2025-04-07,5.004325
2,2025-04-08,4.989262
3,2025-04-09,4.977662
4,2025-04-10,4.968056
...,...,...
360,2026-08-21,4.583347
361,2026-08-24,4.583332
362,2026-08-25,4.583316
363,2026-08-26,4.583301
