## LSTM GOLD PREDICT

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data/M1_Y1.csv")
df.head()

Unnamed: 0,time,Open,High,Low,Close,TickVolume,Spread,real_volume
0,2025-07-03 18:02:00+00:00,3328.741,3329.104,3328.741,3329.049,46,160,0
1,2025-07-03 18:03:00+00:00,3329.049,3329.049,3328.882,3329.027,23,160,0
2,2025-07-03 18:04:00+00:00,3329.026,3329.026,3328.598,3328.737,57,160,0
3,2025-07-03 18:05:00+00:00,3328.74,3329.134,3328.647,3329.016,52,160,0
4,2025-07-03 18:06:00+00:00,3329.015,3329.129,3328.962,3328.962,26,160,0


## LSTM Model

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import random

In [4]:
'''
X: [batch, time, features] = [B, 120, F]
ราคา/เทคนิคอล: Close (หรือ OHLCV + ฟีเจอร์ที่วิศวกรรมเพิ่ม)
อารมณ์ข่าว: เวกเตอร์ความน่าจะเป็น FinBERT (3 มิติ) หรือคะแนนรวมเดียว
ฟีเจอร์เวลา/ความผันผวนตามต้องการ
y: [batch, 1] ราคาถัดไป (หรือผลตอบแทน)
อินพุตสำหรับเชื่อม Trading Bot

กฎตัดสินใจ: ใช้เกณฑ์บนค่าที่ทำนาย (เช่น delta > threshold = bullish)
ต้นทุนเทรด: spread/commission/slippage ใช้กรอง/ปรับสัญญาณ
ความเสี่ยง: ขนาดสถานะ (เช่น volatility targeting), time stop/daily limit
'''

class GoldRNN(nn.Module):
    
    def __init__(self, input_size=1, hidden_size=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        out, (h, c) = self.lstm(x)
        out = out[:, -1, :]
        out = self.fc(out)
        return out.squeeze(1)
    


In [5]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

filter_df = df[["Close"]]
filter_df = filter_df.dropna()

filter_df['target'] = (filter_df["Close"].shift(-1) > filter_df['Close']).astype(int)


scaler = MinMaxScaler()
filter_df['close_scaled'] = scaler.fit_transform(filter_df[['Close']])
filter_df.head()


def create_sequences(data, labels, window_size=30):
    x, y= [], []
    for i in range(len(data) -window_size):
        x.append(data[i:i+window_size])
        y.append(labels[i+window_size])
        
    return np.array(x), np.array(y)

X, y = create_sequences(filter_df['close_scaled'].values, filter_df['target'].values, window_size=120)

X = np.expand_dims(X, axis=-1) #fit with RNN

print(X.shape, y.shape)  # (N, window_size, 1), (N,)

(99517, 120, 1) (99517,)


In [6]:
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

train_dl = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
test_dl = DataLoader(TensorDataset(X_test, y_test), batch_size=64, shuffle=False)

In [7]:
device = torch.device("cuda")

model = GoldRNN().to(device)

In [8]:
import torch.optim as optim

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(10):
    model.train()
    total_loss = 0
    for xb, yb in train_dl:
        xb = xb.to(device)
        yb = yb.to(device)
        
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * xb.size(0)

    print(f"Epoch {epoch+1}, Loss={total_loss/len(train_dl.dataset):.4f}")


Epoch 1, Loss=0.6932
Epoch 2, Loss=0.6932
Epoch 3, Loss=0.6931
Epoch 4, Loss=0.6931
Epoch 5, Loss=0.6931
Epoch 6, Loss=0.6931
Epoch 7, Loss=0.6932
Epoch 8, Loss=0.6931
Epoch 9, Loss=0.6931
Epoch 10, Loss=0.6931
