In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np



In [35]:
class BitcoinDataset(Dataset):
    def __init__(self, dataframe, sequence_length=10):
        """
        BitcoinDataset 생성자.
        :param dataframe: 입력 데이터프레임 (Open, High, Low, Close, Volume, RSI, MACD, EMA 포함)
        :param sequence_length: LSTM 모델에 사용할 시계열 길이
        """
        self.data = dataframe
        self.sequence_length = sequence_length

        # 정규화
        self.scaled_data = self.data.copy()
        self.scaled_data[['Open', 'High', 'Low', 'Close', 'RSI', 'EMAF']] = \
            self.scaled_data[['Open', 'High', 'Low', 'Close', 'RSI', 'EMAF']].apply(
                lambda x: (x - x.min()) / (x.max() - x.min())
            )
    
    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        """
        특정 인덱스에 해당하는 데이터 반환.
        """
        # 시계열 데이터 슬라이스
        features = self.scaled_data[['Open', 'High', 'Low', 'Close', 'RSI', 'EMAF']].iloc[
                   idx:idx + self.sequence_length].values
        target = self.data['Target'].iloc[idx + self.sequence_length]

        return torch.tensor(features, dtype=torch.float32), torch.tensor(target, dtype=torch.long)



In [30]:
import torch
import torch.nn as nn

class BitcoinLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        """
        Bitcoin 예측 LSTM 모델
        :param input_size: 입력 특성 수 (Open, High, Low, Close, Volume, RSI, MACD, EMA 등)
        :param hidden_size: LSTM 은닉 상태 크기
        :param num_layers: LSTM 계층 수
        :param num_classes: 출력 클래스 수 (매수, 매도, 대기)
        """
        super(BitcoinLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Fully Connected Layer
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # 초기 은닉 상태와 셀 상태 정의
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # LSTM forward
        out, _ = self.lstm(x, (h0, c0))
        
        # 마지막 시퀀스의 출력만 사용
        out = self.fc(out[:, -1, :])
        return out


In [48]:
import torch.optim as optim
from sklearn.metrics import accuracy_score

def train_model(model, train_loader, val_loader, num_epochs, learning_rate, device):
    """
    LSTM 모델 학습 함수
    :param model: LSTM 모델
    :param train_loader: 학습 데이터 로더
    :param val_loader: 검증 데이터 로더
    :param num_epochs: 학습 에폭 수
    :param learning_rate: 학습률
    :param device: 학습에 사용할 디바이스 (CPU/GPU)
    """
    # 손실 함수 및 옵티마이저
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # 모델 학습
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_targets = []
        train_predictions = []

        for features, targets in train_loader:
            features, targets = features.to(device), targets.to(device)
            
            # 순전파
            outputs = model(features)
            loss = criterion(outputs, targets)
            train_loss += loss.item()

            # 역전파 및 가중치 업데이트
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 예측 저장
            _, predicted = torch.max(outputs, 1)
            train_targets.extend(targets.cpu().numpy())
            train_predictions.extend(predicted.cpu().numpy())
        
        # 학습 정확도
        train_accuracy = accuracy_score(train_targets, train_predictions)

        # 검증
        model.eval()
        val_loss = 0
        val_targets = []
        val_predictions = []
        with torch.no_grad():
            for features, targets in val_loader:
                features, targets = features.to(device), targets.to(device)
                outputs = model(features)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                val_targets.extend(targets.cpu().numpy())
                val_predictions.extend(predicted.cpu().numpy())
        
        val_accuracy = accuracy_score(val_targets, val_predictions)

        print(f"Epoch [{epoch+1}/{num_epochs}], "
              f"Train Loss: {train_loss/len(train_loader):.4f}, Train Acc: {train_accuracy:.4f}, "
              f"Val Loss: {val_loss/len(val_loader):.4f}, Val Acc: {val_accuracy:.4f}")

    print("Training Complete")


In [72]:
def predict(model, pred_loader, device):
    """
    실시간 예측 함수
    :param model: 학습된 LSTM 모델
    :param input_data: 새로운 입력 데이터 (시퀀스 형태)
    :param device: 사용할 디바이스 (CPU/GPU)
    :return: 매수(0), 매도(1), 대기(2) 중 하나
    
    model.eval()
    with torch.no_grad():
        input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)
        print(input_tensor.shape)
        outputs = model(input_tensor)
        _, predicted = torch.max(outputs, 1)
        return predicted.item()
    """
    model.eval()

    pred_targets = []
    pred_predictions = []
    with torch.no_grad():
        for features, targets in pred_loader:
            features, targets = features.to(device), targets.to(device)
            outputs = model(features)

            _, predicted = torch.max(outputs, 1)
            pred_predictions.extend(predicted.cpu().numpy())
            pred_targets.extend(targets.cpu().numpy())
    pred_accuracy = accuracy_score(pred_targets, pred_predictions)
    print(f"Predict Acc: {pred_accuracy:.4f}")
    #print("실시간 예측 결과:", ["매수", "매도", "대기"][predicted.item()])
    #print("실제 결과 : ", new_data['Target'])

In [33]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from indicator.RSI import RSI
from indicator.EMA import EMA
from indicator.MACD import MACD

year=2018
data = pd.DataFrame()
while year<2019:
    tmp = pd.read_csv(f'/workspace/BTCUSDT/BTCUSDT-1h-{year}.csv', index_col=0)
    tmp = tmp[['Open','High','Low','Close']]
    data = pd.concat([data, tmp])

    data['RSI'] = RSI(data, window=15)
    #data['MACD'] = MACD(data)
    data['EMAF'] = EMA(data, window=20)
    year += 1

data

Unnamed: 0_level_0,Open,High,Low,Close,RSI,EMAF
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-01 00:00:00,13715.65,13715.65,13400.01,13529.01,,13529.010000
2018-01-01 01:00:00,13528.99,13595.89,13155.38,13203.06,0.000000,13497.967143
2018-01-01 02:00:00,13203.00,13418.43,13200.00,13330.18,28.057475,13481.987415
2018-01-01 03:00:00,13330.26,13611.27,13290.00,13410.03,38.836974,13475.134328
2018-01-01 04:00:00,13434.98,13623.29,13322.15,13601.01,54.973063,13487.122487
...,...,...,...,...,...,...
2018-12-31 19:00:00,3741.27,3742.22,3630.33,3687.47,35.722598,3746.325831
2018-12-31 20:00:00,3687.47,3688.77,3657.70,3672.00,34.360101,3739.247180
2018-12-31 21:00:00,3672.00,3678.00,3665.00,3671.72,31.483001,3732.816020
2018-12-31 22:00:00,3671.71,3687.02,3662.02,3686.43,36.015625,3728.398304


In [46]:
import pandas as pd


# 실제 데이터 로드 (CSV 파일 사용 예)
# df = pd.read_csv("your_data.csv")  # CSV 파일 경로를 지정해주세요.

# 데이터가 있다고 가정 (실제 데이터를 여기에 로드해야 함)
# 예: df.head() -> Open, High, Low, Close, Volume, RSI, MACD, EMA

# Target을 계산하기 위한 파라미터 설정
threshold_up = 0.02  # 매수 기준 상승률 (2%)
threshold_down = -0.02  # 매도 기준 하락률 (-2%)
future_window = 5  # 미래 데이터 관찰 창 (5분)
sequence_length=10
batch_size=32

# Target 계산 함수
def calculate_target(row, future_prices, threshold_up, threshold_down):
    current_price = row['Close']
    max_future_price = future_prices.max()
    min_future_price = future_prices.min()
    
    if max_future_price >= current_price * (1 + threshold_up):
        return 0  # 매수
    elif min_future_price <= current_price * (1 + threshold_down):
        return 1  # 매도
    else:
        return 2  # 대기

# Target 생성
def create_targets(df, threshold_up, threshold_down, future_window):
    targets = []
    for i in range(len(df)):
        if i + future_window < len(df):
            # future_window 만큼의 미래 Close 가격을 가져옴
            future_prices = df['Close'].iloc[i + 1:i + 1 + future_window]
            targets.append(calculate_target(df.iloc[i], future_prices, threshold_up, threshold_down))
        else:
            targets.append(2)  # 미래 데이터 부족 시 대기
    df['Target'] = targets
    df.dropna(inplace=True)
    return df

# Target 열 생성
data = create_targets(data, threshold_up, threshold_down, future_window)

# 학습/검증 데이터 분할
train_size = int(0.8 * len(data))
train_df = data[:train_size]
val_df = data[train_size:]

# Dataset 및 DataLoader 생성
train_dataset = BitcoinDataset(train_df, sequence_length)
val_dataset = BitcoinDataset(val_df, sequence_length)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# 결과 확인
print(data.head(10))


                         Open      High       Low     Close        RSI  \
Open time                                                                
2018-01-01 01:00:00  13528.99  13595.89  13155.38  13203.06   0.000000   
2018-01-01 02:00:00  13203.00  13418.43  13200.00  13330.18  28.057475   
2018-01-01 03:00:00  13330.26  13611.27  13290.00  13410.03  38.836974   
2018-01-01 04:00:00  13434.98  13623.29  13322.15  13601.01  54.973063   
2018-01-01 05:00:00  13615.20  13699.00  13526.50  13558.99  51.957123   
2018-01-01 06:00:00  13539.00  13800.00  13510.00  13780.41  62.731177   
2018-01-01 07:00:00  13780.00  13818.55  13555.02  13570.35  51.726240   
2018-01-01 08:00:00  13569.98  13735.24  13400.00  13499.99  48.855462   
2018-01-01 09:00:00  13499.97  13670.00  13459.11  13616.99  53.176724   
2018-01-01 10:00:00  13632.00  13657.92  13497.98  13570.01  51.431824   

                             EMAF  Target  
Open time                                  
2018-01-01 01:00:00  13

In [68]:
# 하이퍼파라미터 및 데이터 설정
input_size = 6  # 예: OHLCV + RSI + MACD + EMA
hidden_size = 64
num_layers = 2
output_size = 3  # 매수, 매도, 대기
sequence_length = 10
learning_rate = 0.001
num_epochs = 20
batch_size = 32



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 모델 초기화 및 학습
model = BitcoinLSTM(input_size, hidden_size, num_layers, output_size).to(device)

train_model(model, train_loader, val_loader, num_epochs, learning_rate, device)



Epoch [1/20], Train Loss: 0.7861, Train Acc: 0.6828, Val Loss: 1.0937, Val Acc: 0.3801
Epoch [2/20], Train Loss: 0.7554, Train Acc: 0.6973, Val Loss: 1.0263, Val Acc: 0.4032
Epoch [3/20], Train Loss: 0.7551, Train Acc: 0.6995, Val Loss: 1.0530, Val Acc: 0.3801
Epoch [4/20], Train Loss: 0.7542, Train Acc: 0.6964, Val Loss: 1.0526, Val Acc: 0.4061
Epoch [5/20], Train Loss: 0.7519, Train Acc: 0.6987, Val Loss: 1.0023, Val Acc: 0.4038
Epoch [6/20], Train Loss: 0.7522, Train Acc: 0.6983, Val Loss: 1.0381, Val Acc: 0.4050
Epoch [7/20], Train Loss: 0.7526, Train Acc: 0.6982, Val Loss: 1.0604, Val Acc: 0.4055
Epoch [8/20], Train Loss: 0.7514, Train Acc: 0.6979, Val Loss: 1.0342, Val Acc: 0.4061
Epoch [9/20], Train Loss: 0.7515, Train Acc: 0.6992, Val Loss: 1.0642, Val Acc: 0.4055
Epoch [10/20], Train Loss: 0.7481, Train Acc: 0.7000, Val Loss: 1.0897, Val Acc: 0.3888
Epoch [11/20], Train Loss: 0.7517, Train Acc: 0.7013, Val Loss: 1.0244, Val Acc: 0.4061
Epoch [12/20], Train Loss: 0.7513, Train 

In [78]:
# 실시간 예측 예제
new_data = pd.read_csv(f'/workspace/BTCUSDT/BTCUSDT-1h-2023.csv', index_col=0)
new_data = new_data[['Open','High','Low','Close']]
new_data['RSI'] = RSI(new_data, window=15)
#data['MACD'] = MACD(data)
new_data['EMAF'] = EMA(new_data, window=20)
new_data = create_targets(new_data, threshold_up, threshold_down, future_window)

#decision = predict(model, new_data[['Open','High','Low','Close','RSI','EMAF']], device)
#print("실시간 예측 결과:", ["매수", "매도", "대기"][decision])
#print("실제 결과 : ", new_data['Target'])

pred_dataset = BitcoinDataset(new_data, sequence_length)

pred_loader = DataLoader(pred_dataset, batch_size=batch_size, shuffle=True)
predict(model, pred_loader, device)

Predict Acc: 0.6150
