In [None]:
import pandas as pd
import numpy as np
import pandas_ta as ta
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from math import sqrt

####################################
# 1. 기술적 지표 계산 함수 (변경 없음)
####################################
def calculate_indicators(data):
    data['William_R'] = ta.willr(data['high'], data['low'], data['close'])
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'])
    data['OBV'] = ta.obv(data['close'], data['volume'])
    data['Z_Score'] = (data['close'] - data['close'].rolling(window=20).mean()) / data['close'].rolling(window=20).std()
    data['Entropy'] = ta.entropy(data['close'], length=14)
    data['SMA_5'] = data['close'].rolling(window=5).mean()
    data['SMA_10'] = data['close'].rolling(window=10).mean()
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_60'] = data['close'].rolling(window=60).mean()
    data['SMA_120'] = data['close'].rolling(window=120).mean()
    data['SMA_250'] = data['close'].rolling(window=250).mean()
    data['RSI'] = ta.rsi(data['close'])
    bb = ta.bbands(data['close'])
    data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
    macd = ta.macd(data['close'])
    data['MACD'] = macd.iloc[:, 0]
    data['Stochastic'] = ta.stoch(data['high'], data['low'], data['close']).iloc[:, 0]
    return data.dropna()

####################################
# 1-2. 추가 feature 계산 (가격 차이)
####################################
def calculate_price_differences(data):
    data['close_open'] = data['close'] - data['open']
    data['high_low'] = data['high'] - data['low']
    data['high_open'] = data['high'] - data['open']
    data['high_close'] = data['high'] - data['close']
    data['open_low'] = data['open'] - data['low']
    data['close_low'] = data['close'] - data['low']
    return data

####################################
# 2. Datetime Feature One-Hot Encoding (각 feature 128차원)
####################################
def encode_datetime_features_onehot(data, dim=128):
    if 'datetime' not in data.columns:
        data['datetime'] = pd.to_datetime(data.index)
    
    data['hour_of_day'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek
    data['week_of_month'] = (data['datetime'].dt.day - 1) // 7 + 1
    data['month'] = data['datetime'].dt.month

    def onehot_with_fixed_dim(series, prefix, dim):
        dummies = pd.get_dummies(series, prefix=prefix)
        expected_cols = [f"{prefix}_{i}" for i in range(dim)]
        dummies = dummies.reindex(columns=expected_cols, fill_value=0)
        return dummies

    hour_one_hot = onehot_with_fixed_dim(data['hour_of_day'], 'Hour', dim)
    day_one_hot = onehot_with_fixed_dim(data['day_of_week'], 'Day', dim)
    week_one_hot = onehot_with_fixed_dim(data['week_of_month'], 'Week', dim)
    month_one_hot = onehot_with_fixed_dim(data['month'], 'Month', dim)
    
    data = pd.concat([data, hour_one_hot, day_one_hot, week_one_hot, month_one_hot], axis=1)
    return data

####################################
# 3. Rolling MinMax Scaling (분모 0 방지)
####################################
def rolling_minmax_scale(series, window=24):
    roll_min = series.rolling(window=window, min_periods=window).min()
    roll_max = series.rolling(window=window, min_periods=window).max()
    scaled = (series - roll_min) / ((roll_max - roll_min) + 1e-8)
    return scaled

####################################
# 4. Binning 후 One-Hot 인코딩 (각 feature를 128차원으로)
####################################
def bin_and_encode(data, features, bins=128, drop_original=True):
    for feature in features:
        data[f'{feature}_Bin'] = pd.cut(data[feature], bins=bins, labels=False)
        one_hot = pd.get_dummies(data[f'{feature}_Bin'], prefix=f'{feature}_Bin')
        expected_columns = [f'{feature}_Bin_{i}' for i in range(bins)]
        one_hot = one_hot.reindex(columns=expected_columns, fill_value=0)
        data = pd.concat([data, one_hot], axis=1)
        if drop_original:
            data.drop(columns=[f'{feature}_Bin'], inplace=True)
    data = data.astype(np.float32)
    return data

####################################
# 5. 데이터 로드 및 전처리
####################################
data = pd.read_csv("BTC_upbit_KRW_min60.csv", index_col=0)
data.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
data.index = pd.to_datetime(data.index)
data = calculate_indicators(data)
data = calculate_price_differences(data)   # 추가 feature 계산
data = encode_datetime_features_onehot(data, dim=128)

# 기존 feature와 추가한 가격 차이 feature들을 사용
features_to_bin = ['open', 'high', 'low', 'volume', 'value', 'William_R',
                   'ATR', 'OBV', 'Z_Score', 'Entropy', 'SMA_5', 'SMA_10', 
                   'SMA_20', 'SMA_60', 'SMA_120', 'SMA_250', 'RSI', 'BB_Upper', 'BB_Middle', 
                   'BB_Lower', 'MACD', 'Stochastic',
                   'close_open', 'high_low', 'high_open', 'high_close', 'open_low', 'close_low']

# datetime one-hot 컬럼: 이미 128차원씩 각 4개 (총 512차원)
datetime_onehot_features = [col for col in data.columns if col.startswith('Hour_') or 
                              col.startswith('Day_') or col.startswith('Week_') or 
                              col.startswith('Month_')]

# 타깃으로 사용할 close 값을 보존 (continuous)
data['close_target'] = data['close']

# 최종 데이터 선택 (여기서 close_target은 타깃으로만 사용)
data = data[features_to_bin + ['close_target'] + datetime_onehot_features].dropna()

# 각 기술적 및 가격 차이 feature에 대해 rolling scaling 적용 (window=24)
for feature in features_to_bin:
    data[feature] = rolling_minmax_scale(data[feature], window=24)
data = data.dropna()

# 기술적 및 가격 차이 feature들을 128차원의 one-hot 벡터로 변환
data = bin_and_encode(data, features_to_bin, bins=128, drop_original=True)
# close_target에 대해서도 one-hot 인코딩 (128차원) 보조 feature로 사용 가능하지만,
# 모델 입력에서는 close_target (continuous)은 제외할 예정
data['close_for_binning'] = data['close_target']
data = bin_and_encode(data, ['close_for_binning'], bins=128, drop_original=False)
data.drop(columns=['close_for_binning'], inplace=True)

# 최종 입력 데이터: 오직 one-hot 인코딩된 feature만 사용 (continuous close_target은 타깃으로만 사용)
final_input_columns = []
for feature in features_to_bin:
    final_input_columns.extend([f'{feature}_Bin_{i}' for i in range(128)])
final_input_columns.extend(datetime_onehot_features)

# 타깃 데이터: continuous close_target
final_target_column = ['close_target']

data_input = data[final_input_columns]
data_target = data[final_target_column]

####################################
# 6-2. Dataset 정의 (입력과 타깃을 별도로 사용)
####################################
class TimeSeriesDataset(Dataset):
    def __init__(self, input_data, target_data, lookback=24):
        self.input_data = input_data.values
        self.target_data = target_data.values  # shape: (N, 1)
        self.lookback = lookback

    def __len__(self):
        return len(self.input_data) - self.lookback

    def __getitem__(self, idx):
        x = self.input_data[idx: idx + self.lookback, :]
        y = self.target_data[idx + self.lookback, 0]
        y_prev = self.target_data[idx + self.lookback - 1, 0]
        y_target = 1 if y > y_prev else 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y_target, dtype=torch.long)

####################################
# 7. Transformer Encoder 직접 구현
####################################
# 7-1. Multi-Head Self-Attention
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        assert self.head_dim * num_heads == embed_dim, "embed_dim은 num_heads로 나누어떨어져야 합니다."
        
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key   = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)
        self.out   = nn.Linear(embed_dim, embed_dim)
        
    def forward(self, x):
        batch_size, seq_len, embed_dim = x.shape
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        
        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        
        scores = torch.matmul(Q, K.transpose(-2, -1)) / sqrt(self.head_dim)
        attn = torch.softmax(scores, dim=-1)
        out = torch.matmul(attn, V)
        
        out = out.transpose(1, 2).contiguous().view(batch_size, seq_len, embed_dim)
        out = self.out(out)
        return out

# 7-2. Feed-Forward Network
class FeedForward(nn.Module):
    def __init__(self, embed_dim, ffn_dim):
        super(FeedForward, self).__init__()
        self.fc1 = nn.Linear(embed_dim, ffn_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(ffn_dim, embed_dim)
        
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

# 7-3. Transformer Encoder Layer (Self-Attention + FFN + Residual + LayerNorm)
class TransformerEncoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, ffn_dim):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.ffn = FeedForward(embed_dim, ffn_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        
    def forward(self, x):
        attn_out = self.self_attn(x)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x)
        x = self.norm2(x + ffn_out)
        return x

# 7-4. Encoder-Only Transformer 직접 구현 (lookback=24이므로, max_seq_len=24)
class EncoderOnlyTransformerCustom(nn.Module):
    def __init__(self, input_dim, embedding_dim=512, num_heads=8, num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=24):
        super(EncoderOnlyTransformerCustom, self).__init__()
        self.token_embedding = nn.Linear(input_dim, embedding_dim)
        self.position_embedding = nn.Embedding(max_seq_len, embedding_dim)
        self.layers = nn.ModuleList([
            TransformerEncoderLayer(embedding_dim, num_heads, ffn_dim)
            for _ in range(num_layers)
        ])
        self.fc = nn.Linear(embedding_dim, num_classes)
        
    def forward(self, x):
        batch_size, seq_len, _ = x.shape
        x = self.token_embedding(x)
        positions = torch.arange(seq_len, device=x.device).unsqueeze(0).expand(batch_size, seq_len)
        x = x + self.position_embedding(positions)
        for layer in self.layers:
            x = layer(x)
        x = x[:, -1, :]
        return self.fc(x)

####################################
# 8. 학습 및 평가 루프 (Fine-tuning 및 Validation Accuracy 출력)
####################################
def evaluate_model(model, data_loader, device):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
    return total_loss / len(data_loader), correct / total

def train_model(model, train_loader, val_loader, num_epochs, lr, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
    
    best_val_loss = float('inf')
    best_state = None
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
        train_loss = total_loss / len(train_loader)
        train_acc = correct / total
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        scheduler.step(val_loss)
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_state = model.state_dict()
    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def train_and_evaluate(data, num_experiments=16, lookback=24, num_epochs=10):
    # 모델 입력은 one-hot 인코딩된 feature들만, 타깃은 continuous close_target
    input_cols = []
    for feature in features_to_bin:
        input_cols.extend([f'{feature}_Bin_{i}' for i in range(128)])
    input_cols.extend(datetime_onehot_features)
    target_cols = ['close_target']
    
    data_input = data[input_cols]
    data_target = data[target_cols]
    
    step_size = 2500  # 이동 단위
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    val_acc_list = []
    test_acc_list = []
    
    for exp in range(num_experiments):
        train_start = exp * step_size
        train_end = train_start + step_size * 8
        val_end = train_end + step_size
        test_end = val_end + step_size
        if test_end > len(data_input):
            break
        print(data)
        
        train_input = data_input.iloc[train_start:train_end]
        train_target = data_target.iloc[train_start:train_end]
        val_input = data_input.iloc[train_end:val_end]
        val_target = data_target.iloc[train_end:val_end]
        test_input = data_input.iloc[val_end:test_end]
        test_target = data_target.iloc[val_end:test_end]
        
        # Dataset 생성: 입력과 타깃을 결합하여 사용
        train_dataset = TimeSeriesDataset(train_input, train_target, lookback=lookback)
        val_dataset = TimeSeriesDataset(val_input, val_target, lookback=lookback)
        test_dataset = TimeSeriesDataset(test_input, test_target, lookback=lookback)

        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
        
        # Fine-tuning: 이전 구간의 모델 파라미터를 그대로 로드하여 미세 조정
        lr = 1e-4
        input_dim = data_input.shape[1]
        model = EncoderOnlyTransformerCustom(input_dim=input_dim, embedding_dim=512, num_heads=8, 
                                               num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=lookback).to(device)
        model_path = f"model_experiment_{exp}.pth"
        if exp > 0:
            try:
                model.load_state_dict(torch.load(f"model_experiment_{exp - 1}.pth"))
                print(f"Loaded model from experiment {exp - 1} for fine-tuning.")
            except FileNotFoundError:
                print(f"Model file for experiment {exp - 1} not found. Starting fresh training.")
        
        print(f"Experiment {exp}: Training with lr={lr} (Fine-Tuning)")
        model = train_model(model, train_loader, val_loader, num_epochs, lr, device)
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for experiment {exp}.")
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        val_acc_list.append(val_acc)
        print(f"Experiment {exp}: Final Validation Accuracy: {val_acc:.4f}")
        
        test_loss, test_acc = evaluate_model(model, test_loader, device)
        test_acc_list.append(test_acc)
        print(f"Experiment {exp}: Test Accuracy: {test_acc:.4f}")
    
    avg_val_acc = sum(val_acc_list) / len(val_acc_list)
    avg_test_acc = sum(test_acc_list) / len(test_acc_list)
    print(f"\nFinal Average Validation Accuracy: {avg_val_acc:.4f}")
    print(f"Final Average Test Accuracy: {avg_test_acc:.4f}")

train_and_evaluate(data)


                         open      high       low    volume     value  \
2017-10-07 02:00:00  0.679612  0.662338  0.338346  0.003346  0.003359   
2017-10-07 03:00:00  0.020202  0.298701  0.285714  0.002740  0.002707   
2017-10-07 04:00:00  0.070707  0.298701  0.240602  0.003067  0.003032   
2017-10-07 05:00:00  0.464646  0.519481  0.368421  0.003391  0.003412   
2017-10-07 06:00:00  0.505050  0.636364  0.375940  0.002723  0.002738   
...                       ...       ...       ...       ...       ...   
2024-12-18 12:00:00  0.000000  0.000000  0.000000  0.442018  0.435797   
2024-12-18 13:00:00  0.000000  0.000000  0.000000  1.000000  1.000000   
2024-12-18 14:00:00  0.000000  0.000000  0.199575  0.298076  0.297948   
2024-12-18 15:00:00  0.000000  0.000000  0.226983  0.109934  0.109102   
2024-12-18 16:00:00  0.053565  0.041933  0.318664  0.000000  0.000000   

                     William_R       ATR       OBV   Z_Score   Entropy  ...  \
2017-10-07 02:00:00   0.000000  1.000000  0.



Epoch 1/10 | Train Loss: 0.7224, Train Acc: 0.5010 | Val Loss: 0.6944, Val Acc: 0.4903
Epoch 2/10 | Train Loss: 0.6966, Train Acc: 0.5125 | Val Loss: 0.6933, Val Acc: 0.5036
Epoch 3/10 | Train Loss: 0.6833, Train Acc: 0.5683 | Val Loss: 0.6928, Val Acc: 0.5404
Epoch 4/10 | Train Loss: 0.6631, Train Acc: 0.6071 | Val Loss: 0.7076, Val Acc: 0.5412
Epoch 5/10 | Train Loss: 0.6474, Train Acc: 0.6342 | Val Loss: 0.7254, Val Acc: 0.5347
Epoch 6/10 | Train Loss: 0.6216, Train Acc: 0.6696 | Val Loss: 0.7528, Val Acc: 0.5295
Epoch 7/10 | Train Loss: 0.5756, Train Acc: 0.7173 | Val Loss: 0.7682, Val Acc: 0.5307
Epoch 8/10 | Train Loss: 0.5556, Train Acc: 0.7397 | Val Loss: 0.7567, Val Acc: 0.5234
Epoch 9/10 | Train Loss: 0.5400, Train Acc: 0.7550 | Val Loss: 0.8329, Val Acc: 0.5234
Epoch 10/10 | Train Loss: 0.5090, Train Acc: 0.7835 | Val Loss: 0.8741, Val Acc: 0.5254
Saved model for experiment 0.
Experiment 0: Final Validation Accuracy: 0.5254
Experiment 0: Test Accuracy: 0.5222
               



Epoch 1/10 | Train Loss: 0.5754, Train Acc: 0.7303 | Val Loss: 0.7941, Val Acc: 0.5263
Epoch 2/10 | Train Loss: 0.5566, Train Acc: 0.7459 | Val Loss: 0.7947, Val Acc: 0.5287
Epoch 3/10 | Train Loss: 0.5342, Train Acc: 0.7652 | Val Loss: 0.8581, Val Acc: 0.5194
Epoch 4/10 | Train Loss: 0.5203, Train Acc: 0.7756 | Val Loss: 0.8982, Val Acc: 0.5214
Epoch 5/10 | Train Loss: 0.4843, Train Acc: 0.8014 | Val Loss: 0.9614, Val Acc: 0.5190
Epoch 6/10 | Train Loss: 0.4560, Train Acc: 0.8194 | Val Loss: 0.9568, Val Acc: 0.5263
Epoch 7/10 | Train Loss: 0.4385, Train Acc: 0.8271 | Val Loss: 1.0476, Val Acc: 0.5174
Epoch 8/10 | Train Loss: 0.4083, Train Acc: 0.8390 | Val Loss: 1.1346, Val Acc: 0.5198
Epoch 9/10 | Train Loss: 0.3903, Train Acc: 0.8454 | Val Loss: 1.1559, Val Acc: 0.5162
Epoch 10/10 | Train Loss: 0.3755, Train Acc: 0.8485 | Val Loss: 1.2063, Val Acc: 0.5153
Saved model for experiment 1.
Experiment 1: Final Validation Accuracy: 0.5153
Experiment 1: Test Accuracy: 0.5044
               



Epoch 1/10 | Train Loss: 0.5278, Train Acc: 0.7666 | Val Loss: 0.9014, Val Acc: 0.4968
Epoch 2/10 | Train Loss: 0.5236, Train Acc: 0.7686 | Val Loss: 0.8965, Val Acc: 0.5093
Epoch 3/10 | Train Loss: 0.5031, Train Acc: 0.7800 | Val Loss: 0.9531, Val Acc: 0.5012
Epoch 4/10 | Train Loss: 0.4798, Train Acc: 0.7895 | Val Loss: 0.9652, Val Acc: 0.5044
Epoch 5/10 | Train Loss: 0.4601, Train Acc: 0.7957 | Val Loss: 0.9552, Val Acc: 0.5053
Epoch 6/10 | Train Loss: 0.4055, Train Acc: 0.8173 | Val Loss: 1.0844, Val Acc: 0.5089
Epoch 7/10 | Train Loss: 0.3812, Train Acc: 0.8210 | Val Loss: 1.2327, Val Acc: 0.4915
Epoch 8/10 | Train Loss: 0.3518, Train Acc: 0.8276 | Val Loss: 1.4380, Val Acc: 0.5036
Epoch 9/10 | Train Loss: 0.3143, Train Acc: 0.8517 | Val Loss: 1.4276, Val Acc: 0.5032
Epoch 10/10 | Train Loss: 0.2953, Train Acc: 0.8714 | Val Loss: 1.5693, Val Acc: 0.4939
Saved model for experiment 2.
Experiment 2: Final Validation Accuracy: 0.4939
Experiment 2: Test Accuracy: 0.5069
               



Epoch 1/10 | Train Loss: 0.4760, Train Acc: 0.7736 | Val Loss: 0.9590, Val Acc: 0.5097
Epoch 2/10 | Train Loss: 0.4690, Train Acc: 0.7700 | Val Loss: 0.9982, Val Acc: 0.5065
Epoch 3/10 | Train Loss: 0.4444, Train Acc: 0.7838 | Val Loss: 1.1429, Val Acc: 0.5206
Epoch 4/10 | Train Loss: 0.4286, Train Acc: 0.7949 | Val Loss: 1.0646, Val Acc: 0.5295
Epoch 5/10 | Train Loss: 0.3566, Train Acc: 0.8409 | Val Loss: 1.1771, Val Acc: 0.5105
Epoch 6/10 | Train Loss: 0.3191, Train Acc: 0.8665 | Val Loss: 1.3187, Val Acc: 0.5295
Epoch 7/10 | Train Loss: 0.2894, Train Acc: 0.8850 | Val Loss: 1.4266, Val Acc: 0.5271
Epoch 8/10 | Train Loss: 0.2432, Train Acc: 0.9128 | Val Loss: 1.5240, Val Acc: 0.5182
Epoch 9/10 | Train Loss: 0.2198, Train Acc: 0.9249 | Val Loss: 1.7316, Val Acc: 0.5109
Epoch 10/10 | Train Loss: 0.2062, Train Acc: 0.9350 | Val Loss: 1.6919, Val Acc: 0.5053
Saved model for experiment 3.
Experiment 3: Final Validation Accuracy: 0.5053
Experiment 3: Test Accuracy: 0.5230
               



Epoch 1/10 | Train Loss: 0.4363, Train Acc: 0.8186 | Val Loss: 1.0417, Val Acc: 0.5133
Epoch 2/10 | Train Loss: 0.4308, Train Acc: 0.8141 | Val Loss: 1.1547, Val Acc: 0.5194
Epoch 3/10 | Train Loss: 0.3978, Train Acc: 0.8332 | Val Loss: 1.0804, Val Acc: 0.5125
Epoch 4/10 | Train Loss: 0.3737, Train Acc: 0.8458 | Val Loss: 1.1595, Val Acc: 0.5210
Epoch 5/10 | Train Loss: 0.3050, Train Acc: 0.8868 | Val Loss: 1.2464, Val Acc: 0.5267
Epoch 6/10 | Train Loss: 0.2667, Train Acc: 0.9080 | Val Loss: 1.4190, Val Acc: 0.5198
Epoch 7/10 | Train Loss: 0.2436, Train Acc: 0.9186 | Val Loss: 1.5727, Val Acc: 0.5238
Epoch 8/10 | Train Loss: 0.1930, Train Acc: 0.9410 | Val Loss: 1.7185, Val Acc: 0.5170
Epoch 9/10 | Train Loss: 0.1694, Train Acc: 0.9512 | Val Loss: 1.8136, Val Acc: 0.5137
Epoch 10/10 | Train Loss: 0.1644, Train Acc: 0.9526 | Val Loss: 1.7334, Val Acc: 0.5149
Saved model for experiment 4.
Experiment 4: Final Validation Accuracy: 0.5149
Experiment 4: Test Accuracy: 0.4996
               



Epoch 1/10 | Train Loss: 0.4103, Train Acc: 0.8402 | Val Loss: 1.0017, Val Acc: 0.4891
Epoch 2/10 | Train Loss: 0.3947, Train Acc: 0.8457 | Val Loss: 1.1460, Val Acc: 0.4915
Epoch 3/10 | Train Loss: 0.3686, Train Acc: 0.8582 | Val Loss: 1.0916, Val Acc: 0.4867
Epoch 4/10 | Train Loss: 0.3551, Train Acc: 0.8656 | Val Loss: 1.2580, Val Acc: 0.4939
Epoch 5/10 | Train Loss: 0.2707, Train Acc: 0.9048 | Val Loss: 1.4486, Val Acc: 0.4891
Epoch 6/10 | Train Loss: 0.2352, Train Acc: 0.9222 | Val Loss: 1.4487, Val Acc: 0.4927
Epoch 7/10 | Train Loss: 0.2130, Train Acc: 0.9337 | Val Loss: 1.5890, Val Acc: 0.4988
Epoch 8/10 | Train Loss: 0.1708, Train Acc: 0.9497 | Val Loss: 1.7244, Val Acc: 0.4972
Epoch 9/10 | Train Loss: 0.1569, Train Acc: 0.9544 | Val Loss: 1.7989, Val Acc: 0.5020
Epoch 10/10 | Train Loss: 0.1431, Train Acc: 0.9612 | Val Loss: 1.9112, Val Acc: 0.4980
Saved model for experiment 5.
Experiment 5: Final Validation Accuracy: 0.4980
Experiment 5: Test Accuracy: 0.5202
               



Epoch 1/10 | Train Loss: 0.4044, Train Acc: 0.8463 | Val Loss: 0.9555, Val Acc: 0.5218
Epoch 2/10 | Train Loss: 0.3850, Train Acc: 0.8520 | Val Loss: 1.0466, Val Acc: 0.5077
Epoch 3/10 | Train Loss: 0.3680, Train Acc: 0.8582 | Val Loss: 1.0402, Val Acc: 0.5109
Epoch 4/10 | Train Loss: 0.3451, Train Acc: 0.8723 | Val Loss: 1.2409, Val Acc: 0.5214
Epoch 5/10 | Train Loss: 0.2625, Train Acc: 0.9113 | Val Loss: 1.2795, Val Acc: 0.5226
Epoch 6/10 | Train Loss: 0.2305, Train Acc: 0.9273 | Val Loss: 1.6100, Val Acc: 0.5158
Epoch 7/10 | Train Loss: 0.2070, Train Acc: 0.9400 | Val Loss: 1.6279, Val Acc: 0.5214
Epoch 8/10 | Train Loss: 0.1757, Train Acc: 0.9516 | Val Loss: 1.6947, Val Acc: 0.5202
Epoch 9/10 | Train Loss: 0.1609, Train Acc: 0.9560 | Val Loss: 1.8027, Val Acc: 0.5234
Epoch 10/10 | Train Loss: 0.1511, Train Acc: 0.9600 | Val Loss: 1.8041, Val Acc: 0.5182
Saved model for experiment 6.
Experiment 6: Final Validation Accuracy: 0.5182
Experiment 6: Test Accuracy: 0.5109
               



Epoch 1/10 | Train Loss: 0.3893, Train Acc: 0.8540 | Val Loss: 0.9618, Val Acc: 0.5105
Epoch 2/10 | Train Loss: 0.3761, Train Acc: 0.8554 | Val Loss: 1.1703, Val Acc: 0.5057
Epoch 3/10 | Train Loss: 0.3628, Train Acc: 0.8635 | Val Loss: 1.2393, Val Acc: 0.5109
Epoch 4/10 | Train Loss: 0.3371, Train Acc: 0.8764 | Val Loss: 1.1837, Val Acc: 0.5113
Epoch 5/10 | Train Loss: 0.2638, Train Acc: 0.9117 | Val Loss: 1.3133, Val Acc: 0.5186
Epoch 6/10 | Train Loss: 0.2212, Train Acc: 0.9314 | Val Loss: 1.4649, Val Acc: 0.5129
Epoch 7/10 | Train Loss: 0.2029, Train Acc: 0.9400 | Val Loss: 1.6694, Val Acc: 0.5162
Epoch 8/10 | Train Loss: 0.1679, Train Acc: 0.9529 | Val Loss: 1.9017, Val Acc: 0.5170
Epoch 9/10 | Train Loss: 0.1501, Train Acc: 0.9586 | Val Loss: 1.9689, Val Acc: 0.5117
Epoch 10/10 | Train Loss: 0.1364, Train Acc: 0.9634 | Val Loss: 2.0087, Val Acc: 0.5153
Saved model for experiment 7.
Experiment 7: Final Validation Accuracy: 0.5153
Experiment 7: Test Accuracy: 0.5190
               



Epoch 1/10 | Train Loss: 0.3826, Train Acc: 0.8589 | Val Loss: 1.1273, Val Acc: 0.5149
Epoch 2/10 | Train Loss: 0.3548, Train Acc: 0.8667 | Val Loss: 1.1635, Val Acc: 0.5089
Epoch 3/10 | Train Loss: 0.3274, Train Acc: 0.8803 | Val Loss: 1.2588, Val Acc: 0.5182
Epoch 4/10 | Train Loss: 0.3107, Train Acc: 0.8883 | Val Loss: 1.3460, Val Acc: 0.5093
Epoch 5/10 | Train Loss: 0.2372, Train Acc: 0.9227 | Val Loss: 1.4152, Val Acc: 0.5073
Epoch 6/10 | Train Loss: 0.1991, Train Acc: 0.9367 | Val Loss: 1.7036, Val Acc: 0.5081
Epoch 7/10 | Train Loss: 0.1803, Train Acc: 0.9433 | Val Loss: 1.7992, Val Acc: 0.5170
Epoch 8/10 | Train Loss: 0.1363, Train Acc: 0.9608 | Val Loss: 1.9922, Val Acc: 0.5153
Epoch 9/10 | Train Loss: 0.1188, Train Acc: 0.9679 | Val Loss: 2.2417, Val Acc: 0.5113
Epoch 10/10 | Train Loss: 0.1004, Train Acc: 0.9756 | Val Loss: 2.1705, Val Acc: 0.5230
Saved model for experiment 8.
Experiment 8: Final Validation Accuracy: 0.5230
Experiment 8: Test Accuracy: 0.5121
               



Epoch 1/10 | Train Loss: 0.3802, Train Acc: 0.8600 | Val Loss: 1.0891, Val Acc: 0.5117
Epoch 2/10 | Train Loss: 0.3378, Train Acc: 0.8751 | Val Loss: 1.1574, Val Acc: 0.5137
Epoch 3/10 | Train Loss: 0.3078, Train Acc: 0.8871 | Val Loss: 1.1232, Val Acc: 0.5109
Epoch 4/10 | Train Loss: 0.2827, Train Acc: 0.8990 | Val Loss: 1.1012, Val Acc: 0.5121
Epoch 5/10 | Train Loss: 0.2093, Train Acc: 0.9321 | Val Loss: 1.5246, Val Acc: 0.5109
Epoch 6/10 | Train Loss: 0.1656, Train Acc: 0.9517 | Val Loss: 1.8756, Val Acc: 0.5040
Epoch 7/10 | Train Loss: 0.1461, Train Acc: 0.9595 | Val Loss: 1.9350, Val Acc: 0.5166
Epoch 8/10 | Train Loss: 0.1071, Train Acc: 0.9733 | Val Loss: 2.0833, Val Acc: 0.5097
Epoch 9/10 | Train Loss: 0.0851, Train Acc: 0.9800 | Val Loss: 2.3906, Val Acc: 0.5077
Epoch 10/10 | Train Loss: 0.0727, Train Acc: 0.9837 | Val Loss: 2.6977, Val Acc: 0.5073
Saved model for experiment 9.
Experiment 9: Final Validation Accuracy: 0.5073
Experiment 9: Test Accuracy: 0.5190
               



Epoch 1/10 | Train Loss: 0.3614, Train Acc: 0.8715 | Val Loss: 1.1259, Val Acc: 0.5178
Epoch 2/10 | Train Loss: 0.3210, Train Acc: 0.8801 | Val Loss: 1.2485, Val Acc: 0.5331
Epoch 3/10 | Train Loss: 0.2745, Train Acc: 0.9000 | Val Loss: 1.3879, Val Acc: 0.5149
Epoch 4/10 | Train Loss: 0.2398, Train Acc: 0.9137 | Val Loss: 1.4582, Val Acc: 0.5210
Epoch 5/10 | Train Loss: 0.1564, Train Acc: 0.9509 | Val Loss: 1.9607, Val Acc: 0.5254
Epoch 6/10 | Train Loss: 0.1271, Train Acc: 0.9640 | Val Loss: 2.1262, Val Acc: 0.5174
Epoch 7/10 | Train Loss: 0.1053, Train Acc: 0.9706 | Val Loss: 2.1387, Val Acc: 0.5206
Epoch 8/10 | Train Loss: 0.0642, Train Acc: 0.9857 | Val Loss: 2.2666, Val Acc: 0.5190
Epoch 9/10 | Train Loss: 0.0484, Train Acc: 0.9894 | Val Loss: 2.5266, Val Acc: 0.5113
Epoch 10/10 | Train Loss: 0.0378, Train Acc: 0.9923 | Val Loss: 2.8108, Val Acc: 0.5141
Saved model for experiment 10.
Experiment 10: Final Validation Accuracy: 0.5141
Experiment 10: Test Accuracy: 0.4980
            



Epoch 1/10 | Train Loss: 0.3285, Train Acc: 0.8880 | Val Loss: 1.2327, Val Acc: 0.4952
Epoch 2/10 | Train Loss: 0.2846, Train Acc: 0.8979 | Val Loss: 1.3009, Val Acc: 0.5057
Epoch 3/10 | Train Loss: 0.2464, Train Acc: 0.9129 | Val Loss: 1.4502, Val Acc: 0.5234
Epoch 4/10 | Train Loss: 0.2207, Train Acc: 0.9245 | Val Loss: 1.7092, Val Acc: 0.5186
Epoch 5/10 | Train Loss: 0.1438, Train Acc: 0.9582 | Val Loss: 2.1437, Val Acc: 0.5036
Epoch 6/10 | Train Loss: 0.1139, Train Acc: 0.9691 | Val Loss: 2.0846, Val Acc: 0.5008
Epoch 7/10 | Train Loss: 0.0918, Train Acc: 0.9764 | Val Loss: 2.3092, Val Acc: 0.5081
Epoch 8/10 | Train Loss: 0.0551, Train Acc: 0.9872 | Val Loss: 2.8065, Val Acc: 0.5028
Epoch 9/10 | Train Loss: 0.0398, Train Acc: 0.9916 | Val Loss: 2.9964, Val Acc: 0.4992
Epoch 10/10 | Train Loss: 0.0318, Train Acc: 0.9935 | Val Loss: 2.9744, Val Acc: 0.5020
Saved model for experiment 11.
Experiment 11: Final Validation Accuracy: 0.5020
Experiment 11: Test Accuracy: 0.5153
            



Epoch 1/10 | Train Loss: 0.3221, Train Acc: 0.8940 | Val Loss: 1.1983, Val Acc: 0.5190
Epoch 2/10 | Train Loss: 0.2960, Train Acc: 0.8969 | Val Loss: 1.2618, Val Acc: 0.5202
Epoch 3/10 | Train Loss: 0.2522, Train Acc: 0.9151 | Val Loss: 1.3488, Val Acc: 0.5101
Epoch 4/10 | Train Loss: 0.2231, Train Acc: 0.9253 | Val Loss: 1.4722, Val Acc: 0.5182
Epoch 5/10 | Train Loss: 0.1528, Train Acc: 0.9561 | Val Loss: 1.8469, Val Acc: 0.5174
Epoch 6/10 | Train Loss: 0.1144, Train Acc: 0.9714 | Val Loss: 2.0313, Val Acc: 0.5254
Epoch 7/10 | Train Loss: 0.1023, Train Acc: 0.9756 | Val Loss: 2.0910, Val Acc: 0.5202
Epoch 8/10 | Train Loss: 0.0682, Train Acc: 0.9852 | Val Loss: 2.3741, Val Acc: 0.5287
Epoch 9/10 | Train Loss: 0.0587, Train Acc: 0.9874 | Val Loss: 2.5497, Val Acc: 0.5210
Epoch 10/10 | Train Loss: 0.0478, Train Acc: 0.9902 | Val Loss: 2.4268, Val Acc: 0.5242
Saved model for experiment 12.
Experiment 12: Final Validation Accuracy: 0.5242
Experiment 12: Test Accuracy: 0.5129
            



Epoch 1/10 | Train Loss: 0.3155, Train Acc: 0.8928 | Val Loss: 1.2431, Val Acc: 0.5113
Epoch 2/10 | Train Loss: 0.2770, Train Acc: 0.9067 | Val Loss: 1.2524, Val Acc: 0.5186
Epoch 3/10 | Train Loss: 0.2495, Train Acc: 0.9164 | Val Loss: 1.4211, Val Acc: 0.5194
Epoch 4/10 | Train Loss: 0.2215, Train Acc: 0.9298 | Val Loss: 1.4440, Val Acc: 0.5210
Epoch 5/10 | Train Loss: 0.1561, Train Acc: 0.9561 | Val Loss: 1.7271, Val Acc: 0.5153
Epoch 6/10 | Train Loss: 0.1226, Train Acc: 0.9696 | Val Loss: 1.8357, Val Acc: 0.5077
Epoch 7/10 | Train Loss: 0.1039, Train Acc: 0.9758 | Val Loss: 2.1976, Val Acc: 0.5081
Epoch 8/10 | Train Loss: 0.0781, Train Acc: 0.9832 | Val Loss: 2.0871, Val Acc: 0.5048
Epoch 9/10 | Train Loss: 0.0663, Train Acc: 0.9865 | Val Loss: 2.3884, Val Acc: 0.5101
Epoch 10/10 | Train Loss: 0.0589, Train Acc: 0.9878 | Val Loss: 2.5393, Val Acc: 0.4992
Saved model for experiment 13.
Experiment 13: Final Validation Accuracy: 0.4992
Experiment 13: Test Accuracy: 0.4988
            



Epoch 1/10 | Train Loss: 0.3363, Train Acc: 0.8871 | Val Loss: 1.2097, Val Acc: 0.4960
Epoch 2/10 | Train Loss: 0.2674, Train Acc: 0.9109 | Val Loss: 1.2480, Val Acc: 0.5044
Epoch 3/10 | Train Loss: 0.2505, Train Acc: 0.9167 | Val Loss: 1.3850, Val Acc: 0.5077
Epoch 4/10 | Train Loss: 0.2287, Train Acc: 0.9255 | Val Loss: 1.5407, Val Acc: 0.5032
Epoch 5/10 | Train Loss: 0.1594, Train Acc: 0.9553 | Val Loss: 1.7460, Val Acc: 0.4931
Epoch 6/10 | Train Loss: 0.1257, Train Acc: 0.9676 | Val Loss: 1.9391, Val Acc: 0.4907
Epoch 7/10 | Train Loss: 0.1126, Train Acc: 0.9710 | Val Loss: 2.1010, Val Acc: 0.4952
Epoch 8/10 | Train Loss: 0.0796, Train Acc: 0.9822 | Val Loss: 2.2414, Val Acc: 0.4935
Epoch 9/10 | Train Loss: 0.0651, Train Acc: 0.9864 | Val Loss: 2.4148, Val Acc: 0.4972
Epoch 10/10 | Train Loss: 0.0567, Train Acc: 0.9885 | Val Loss: 2.5369, Val Acc: 0.4980
Saved model for experiment 14.
Experiment 14: Final Validation Accuracy: 0.4980
Experiment 14: Test Accuracy: 0.5174
            



Epoch 1/10 | Train Loss: 0.3188, Train Acc: 0.8929 | Val Loss: 1.1550, Val Acc: 0.5283
Epoch 2/10 | Train Loss: 0.2688, Train Acc: 0.9069 | Val Loss: 1.4073, Val Acc: 0.5214
Epoch 3/10 | Train Loss: 0.2309, Train Acc: 0.9227 | Val Loss: 1.4230, Val Acc: 0.5182
Epoch 4/10 | Train Loss: 0.2059, Train Acc: 0.9305 | Val Loss: 1.6834, Val Acc: 0.5194
Epoch 5/10 | Train Loss: 0.1364, Train Acc: 0.9595 | Val Loss: 1.9137, Val Acc: 0.5295
Epoch 6/10 | Train Loss: 0.1024, Train Acc: 0.9727 | Val Loss: 2.1473, Val Acc: 0.5230
Epoch 7/10 | Train Loss: 0.0788, Train Acc: 0.9815 | Val Loss: 2.2744, Val Acc: 0.5170
Epoch 8/10 | Train Loss: 0.0488, Train Acc: 0.9893 | Val Loss: 2.5319, Val Acc: 0.5198
Epoch 9/10 | Train Loss: 0.0361, Train Acc: 0.9928 | Val Loss: 2.8635, Val Acc: 0.5214
Epoch 10/10 | Train Loss: 0.0287, Train Acc: 0.9946 | Val Loss: 2.9897, Val Acc: 0.5174
Saved model for experiment 15.
Experiment 15: Final Validation Accuracy: 0.5174
Experiment 15: Test Accuracy: 0.5109

Final Avera

In [6]:
import pandas as pd
import numpy as np
import pandas_ta as ta
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from math import sqrt

####################################
# 1. 기술적 지표 계산 함수 (변경 없음)
####################################
def calculate_indicators(data):
    data['William_R'] = ta.willr(data['high'], data['low'], data['close'])
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'])
    data['OBV'] = ta.obv(data['close'], data['volume'])
    data['Z_Score'] = (data['close'] - data['close'].rolling(window=20).mean()) / data['close'].rolling(window=20).std()
    data['Entropy'] = ta.entropy(data['close'], length=14)
    data['SMA_5'] = data['close'].rolling(window=5).mean()
    data['SMA_10'] = data['close'].rolling(window=10).mean()
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_60'] = data['close'].rolling(window=60).mean()
    data['SMA_120'] = data['close'].rolling(window=120).mean()
    data['SMA_250'] = data['close'].rolling(window=250).mean()
    data['RSI'] = ta.rsi(data['close'])
    bb = ta.bbands(data['close'])
    data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
    macd = ta.macd(data['close'])
    data['MACD'] = macd.iloc[:, 0]
    data['Stochastic'] = ta.stoch(data['high'], data['low'], data['close']).iloc[:, 0]
    return data.dropna()

####################################
# 1-2. 추가 feature 계산 (가격 차이)
####################################
def calculate_price_differences(data):
    data['close_open'] = data['close'] - data['open']
    data['high_low'] = data['high'] - data['low']
    data['high_open'] = data['high'] - data['open']
    data['high_close'] = data['high'] - data['close']
    data['open_low'] = data['open'] - data['low']
    data['close_low'] = data['close'] - data['low']
    return data

####################################
# 2. Datetime Feature One-Hot Encoding (각 feature 128차원)
####################################
def encode_datetime_features_onehot(data, dim=128):
    if 'datetime' not in data.columns:
        data['datetime'] = pd.to_datetime(data.index)
    
    data['hour_of_day'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek
    data['week_of_month'] = (data['datetime'].dt.day - 1) // 7 + 1
    data['month'] = data['datetime'].dt.month

    def onehot_with_fixed_dim(series, prefix, dim):
        dummies = pd.get_dummies(series, prefix=prefix)
        expected_cols = [f"{prefix}_{i}" for i in range(dim)]
        dummies = dummies.reindex(columns=expected_cols, fill_value=0)
        return dummies

    hour_one_hot = onehot_with_fixed_dim(data['hour_of_day'], 'Hour', dim)
    day_one_hot = onehot_with_fixed_dim(data['day_of_week'], 'Day', dim)
    week_one_hot = onehot_with_fixed_dim(data['week_of_month'], 'Week', dim)
    month_one_hot = onehot_with_fixed_dim(data['month'], 'Month', dim)
    
    data = pd.concat([data, hour_one_hot, day_one_hot, week_one_hot, month_one_hot], axis=1)
    return data

####################################
# 3. Rolling MinMax Scaling (분모 0 방지)
####################################
def rolling_minmax_scale(series, window=24):
    roll_min = series.rolling(window=window, min_periods=window).min()
    roll_max = series.rolling(window=window, min_periods=window).max()
    scaled = (series - roll_min) / ((roll_max - roll_min) + 1e-8)
    return scaled

####################################
# 4. Binning 후 One-Hot 인코딩 (각 feature를 128차원으로)
####################################
def bin_and_encode(data, features, bins=128, drop_original=True):
    for feature in features:
        data[f'{feature}_Bin'] = pd.cut(data[feature], bins=bins, labels=False)
        one_hot = pd.get_dummies(data[f'{feature}_Bin'], prefix=f'{feature}_Bin')
        expected_columns = [f'{feature}_Bin_{i}' for i in range(bins)]
        one_hot = one_hot.reindex(columns=expected_columns, fill_value=0)
        data = pd.concat([data, one_hot], axis=1)
        if drop_original:
            data.drop(columns=[f'{feature}_Bin'], inplace=True)
    data = data.astype(np.float32)
    return data

####################################
# 5. 데이터 로드 및 전처리
####################################
data = pd.read_csv("BTC_upbit_KRW_min5.csv", index_col=0)
data.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
data.index = pd.to_datetime(data.index)

data = calculate_indicators(data)
data = calculate_price_differences(data)   # 추가 feature 계산
data = encode_datetime_features_onehot(data, dim=128)

# 원본 feature와 추가한 가격 차이 feature들을 사용
features_to_bin = ['open', 'high', 'low', 'volume', 'value', 'William_R',
                   'ATR', 'OBV', 'Z_Score', 'Entropy', 'SMA_5', 'SMA_10', 
                   'SMA_20', 'SMA_60', 'SMA_120', 'SMA_250', 'RSI', 'BB_Upper', 'BB_Middle', 
                   'BB_Lower', 'MACD', 'Stochastic',
                   'close_open', 'high_low', 'high_open', 'high_close', 'open_low', 'close_low']

# datetime one-hot 컬럼: 이미 각각 128차원씩 4개 (총 512차원)
datetime_onehot_features = [col for col in data.columns if col.startswith('Hour_') or 
                              col.startswith('Day_') or col.startswith('Week_') or 
                              col.startswith('Month_')]

# 타깃으로 사용할 값들은 모델 타깃으로만 사용하기 위해 보존
data['close_target'] = data['close']
data['open_target'] = data['open']

# 최종 데이터 선택 (여기서 close_target, open_target은 타깃으로만 사용)
data = data[features_to_bin + ['close_target', 'open_target'] + datetime_onehot_features].dropna()

# 각 기술적 및 가격 차이 feature에 대해 rolling scaling 적용 (window=24)
for feature in features_to_bin:
    data[feature] = rolling_minmax_scale(data[feature], window=24)
data = data.dropna()

# 기술적 및 가격 차이 feature들을 128차원의 one-hot 벡터로 변환
data = bin_and_encode(data, features_to_bin, bins=128, drop_original=True)

# 보조로 사용할 close_target에 대해서도 one-hot 인코딩 (128차원)이 필요하면 진행할 수 있으나,
# 여기서는 타깃은 continuous 값으로만 사용합니다.
# 최종 입력 데이터에는 타깃 컬럼은 포함하지 않습니다.

####################################
# 최종 입력/타깃 데이터 구성
####################################
# 입력: 기술적/가격 차이 feature의 one-hot 인코딩 결과와 datetime one-hot 인코딩 결과만
final_input_columns = []
for feature in features_to_bin:
    final_input_columns.extend([f'{feature}_Bin_{i}' for i in range(128)])
final_input_columns.extend(datetime_onehot_features)

# 타깃: open_target과 close_target (continuous)
final_target_columns = ['open_target', 'close_target']

data_input = data[final_input_columns]
data_target = data[final_target_columns]

####################################
# 6-2. Dataset 정의 (입력과 타깃을 별도로 사용)
####################################
class TimeSeriesDataset(Dataset):
    def __init__(self, input_data, target_data, lookback=24):
        self.input_data = input_data.values
        self.target_data = target_data.values  # shape: (N, 2)
        self.lookback = lookback

    def __len__(self):
        return len(self.input_data) - self.lookback

    def __getitem__(self, idx):
        x = self.input_data[idx: idx + self.lookback, :]
        # 타깃은 lookback 이후 시점의 open_target와 close_target 값을 가져와서,
        # candle이 양봉이면 1, 음봉이면 0으로 설정 (양봉: close > open)
        open_val = self.target_data[idx + self.lookback, 0]
        close_val = self.target_data[idx + self.lookback, 1]
        y_target = 1 if close_val > open_val else 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y_target, dtype=torch.long)

####################################
# 7. Transformer Encoder 직접 구현
####################################
# 7-1. Multi-Head Self-Attention
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        assert self.head_dim * num_heads == embed_dim, "embed_dim은 num_heads로 나누어떨어져야 합니다."
        
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key   = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)
        self.out   = nn.Linear(embed_dim, embed_dim)
        
    def forward(self, x):
        batch_size, seq_len, embed_dim = x.shape
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        
        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        
        scores = torch.matmul(Q, K.transpose(-2, -1)) / sqrt(self.head_dim)
        attn = torch.softmax(scores, dim=-1)
        out = torch.matmul(attn, V)
        
        out = out.transpose(1, 2).contiguous().view(batch_size, seq_len, embed_dim)
        out = self.out(out)
        return out

# 7-2. Feed-Forward Network
class FeedForward(nn.Module):
    def __init__(self, embed_dim, ffn_dim):
        super(FeedForward, self).__init__()
        self.fc1 = nn.Linear(embed_dim, ffn_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(ffn_dim, embed_dim)
        
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

# 7-3. Transformer Encoder Layer (Self-Attention + FFN + Residual + LayerNorm)
class TransformerEncoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, ffn_dim):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.ffn = FeedForward(embed_dim, ffn_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        
    def forward(self, x):
        attn_out = self.self_attn(x)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x)
        x = self.norm2(x + ffn_out)
        return x

# 7-4. Encoder-Only Transformer 직접 구현 (lookback=24이므로, max_seq_len=24)
class EncoderOnlyTransformerCustom(nn.Module):
    def __init__(self, input_dim, embedding_dim=512, num_heads=8, num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=24):
        super(EncoderOnlyTransformerCustom, self).__init__()
        self.token_embedding = nn.Linear(input_dim, embedding_dim)
        self.position_embedding = nn.Embedding(max_seq_len, embedding_dim)
        self.layers = nn.ModuleList([
            TransformerEncoderLayer(embedding_dim, num_heads, ffn_dim)
            for _ in range(num_layers)
        ])
        self.fc = nn.Linear(embedding_dim, num_classes)
        
    def forward(self, x):
        batch_size, seq_len, _ = x.shape
        x = self.token_embedding(x)
        positions = torch.arange(seq_len, device=x.device).unsqueeze(0).expand(batch_size, seq_len)
        x = x + self.position_embedding(positions)
        for layer in self.layers:
            x = layer(x)
        x = x[:, -1, :]
        return self.fc(x)

####################################
# 8. 학습 및 평가 루프 (Fine-Tuning 및 Validation Accuracy 출력)
####################################
def evaluate_model(model, data_loader, device):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
    return total_loss / len(data_loader), correct / total

def train_model(model, train_loader, val_loader, num_epochs, lr, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
    
    best_val_loss = float('inf')
    best_state = None
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
        train_loss = total_loss / len(train_loader)
        train_acc = correct / total
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        scheduler.step(val_loss)
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_state = model.state_dict()
    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def train_and_evaluate(data, num_experiments=16, lookback=24, num_epochs=10):
    # 모델 입력은 one-hot 인코딩된 feature들만, 타깃은 continuous open_target과 close_target
    input_cols = []
    for feature in features_to_bin:
        input_cols.extend([f'{feature}_Bin_{i}' for i in range(128)])
    input_cols.extend(datetime_onehot_features)
    target_cols = ['open_target', 'close_target']
    
    # 타깃 컬럼 추가: 원본 open, close 값을 보존 (continuous)
    data['open_target'] = data['open']
    data['close_target'] = data['close']
    
    data_input = data[input_cols]
    data_target = data[target_cols]
    
    step_size = 30000  # 이동 단위
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    val_acc_list = []
    test_acc_list = []
    
    for exp in range(num_experiments):
        train_start = exp * step_size
        train_end = train_start + step_size * 8
        val_end = train_end + step_size
        test_end = val_end + step_size
        if test_end > len(data_input):
            break
        print(data)
        
        train_input = data_input.iloc[train_start:train_end]
        train_target = data_target.iloc[train_start:train_end]
        val_input = data_input.iloc[train_end:val_end]
        val_target = data_target.iloc[train_end:val_end]
        test_input = data_input.iloc[val_end:test_end]
        test_target = data_target.iloc[val_end:test_end]
        
        # Dataset 생성: 입력과 타깃을 따로 전달
        train_dataset = TimeSeriesDataset(train_input, train_target, lookback=lookback)
        val_dataset = TimeSeriesDataset(val_input, val_target, lookback=lookback)
        test_dataset = TimeSeriesDataset(test_input, test_target, lookback=lookback)
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
        
        # Fine-tuning: exp==0일 때는 새로 학습, 이후 구간에서는 이전 모델 파라미터를 그대로 로드하여 미세 조정
        lr = 1e-4
        input_dim = data_input.shape[1]
        model = EncoderOnlyTransformerCustom(input_dim=input_dim, embedding_dim=512, num_heads=8, 
                                               num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=lookback).to(device)
        model_path = f"model_experiment_{exp}.pth"
        if exp > 0:
            try:
                model.load_state_dict(torch.load(f"model_experiment_{exp - 1}.pth"))
                print(f"Loaded model from experiment {exp - 1} for fine-tuning.")
            except FileNotFoundError:
                print(f"Model file for experiment {exp - 1} not found. Starting fresh training.")
        
        print(f"Experiment {exp}: Training with lr={lr} (Fine-Tuning)")
        model = train_model(model, train_loader, val_loader, num_epochs, lr, device)
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for experiment {exp}.")
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        val_acc_list.append(val_acc)
        print(f"Experiment {exp}: Final Validation Accuracy: {val_acc:.4f}")
        
        test_loss, test_acc = evaluate_model(model, test_loader, device)
        test_acc_list.append(test_acc)
        print(f"Experiment {exp}: Test Accuracy: {test_acc:.4f}")
    
    avg_val_acc = sum(val_acc_list) / len(val_acc_list)
    avg_test_acc = sum(test_acc_list) / len(test_acc_list)
    print(f"\nFinal Average Validation Accuracy: {avg_val_acc:.4f}")
    print(f"Final Average Test Accuracy: {avg_test_acc:.4f}")

train_and_evaluate(data)


KeyError: 'close'

In [1]:
import pandas as pd
import numpy as np
import pandas_ta as ta
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from math import sqrt

####################################
# 1. 기술적 지표 계산 함수 (변경 없음)
####################################
def calculate_indicators(data):
    data['William_R'] = ta.willr(data['high'], data['low'], data['close'])
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'])
    data['OBV'] = ta.obv(data['close'], data['volume'])
    data['Z_Score'] = (data['close'] - data['close'].rolling(window=20).mean()) / data['close'].rolling(window=20).std()
    data['Entropy'] = ta.entropy(data['close'], length=14)
    data['SMA_5'] = data['close'].rolling(window=5).mean()
    data['SMA_10'] = data['close'].rolling(window=10).mean()
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_60'] = data['close'].rolling(window=60).mean()
    data['SMA_120'] = data['close'].rolling(window=120).mean()
    data['SMA_250'] = data['close'].rolling(window=250).mean()
    data['RSI'] = ta.rsi(data['close'])
    bb = ta.bbands(data['close'])
    data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
    macd = ta.macd(data['close'])
    data['MACD'] = macd.iloc[:, 0]
    data['Stochastic'] = ta.stoch(data['high'], data['low'], data['close']).iloc[:, 0]
    return data.dropna()

####################################
# 1-2. 추가 feature 계산 (가격 차이)
####################################
def calculate_price_differences(data):
    data['close_open'] = data['close'] - data['open']
    data['high_low'] = data['high'] - data['low']
    data['high_open'] = data['high'] - data['open']
    data['high_close'] = data['high'] - data['close']
    data['open_low'] = data['open'] - data['low']
    data['close_low'] = data['close'] - data['low']
    return data

####################################
# 2. Datetime Feature One-Hot Encoding (각 feature 128차원)
####################################
def encode_datetime_features_onehot(data, dim=128):
    if 'datetime' not in data.columns:
        data['datetime'] = pd.to_datetime(data.index)
    
    data['hour_of_day'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek
    data['week_of_month'] = (data['datetime'].dt.day - 1) // 7 + 1
    data['month'] = data['datetime'].dt.month

    def onehot_with_fixed_dim(series, prefix, dim):
        dummies = pd.get_dummies(series, prefix=prefix)
        expected_cols = [f"{prefix}_{i}" for i in range(dim)]
        dummies = dummies.reindex(columns=expected_cols, fill_value=0)
        return dummies

    hour_one_hot = onehot_with_fixed_dim(data['hour_of_day'], 'Hour', dim)
    day_one_hot = onehot_with_fixed_dim(data['day_of_week'], 'Day', dim)
    week_one_hot = onehot_with_fixed_dim(data['week_of_month'], 'Week', dim)
    month_one_hot = onehot_with_fixed_dim(data['month'], 'Month', dim)
    
    data = pd.concat([data, hour_one_hot, day_one_hot, week_one_hot, month_one_hot], axis=1)
    return data

####################################
# 3. Rolling MinMax Scaling (분모 0 방지)
####################################
def rolling_minmax_scale(series, window=24):
    roll_min = series.rolling(window=window, min_periods=window).min()
    roll_max = series.rolling(window=window, min_periods=window).max()
    scaled = (series - roll_min) / ((roll_max - roll_min) + 1e-8)
    return scaled

####################################
# 4. Binning 후 One-Hot 인코딩 (각 feature를 128차원으로)
####################################
def bin_and_encode(data, features, bins=128, drop_original=True):
    for feature in features:
        data[f'{feature}_Bin'] = pd.cut(data[feature], bins=bins, labels=False)
        one_hot = pd.get_dummies(data[f'{feature}_Bin'], prefix=f'{feature}_Bin')
        expected_columns = [f'{feature}_Bin_{i}' for i in range(bins)]
        one_hot = one_hot.reindex(columns=expected_columns, fill_value=0)
        data = pd.concat([data, one_hot], axis=1)
        if drop_original:
            data.drop(columns=[f'{feature}_Bin'], inplace=True)
    data = data.astype(np.float32)
    return data

####################################
# 5. 데이터 로드 및 전처리
####################################
data = pd.read_csv("BTC_upbit_KRW_min60.csv", index_col=0)
data.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
data.index = pd.to_datetime(data.index)
data = calculate_indicators(data)
data = calculate_price_differences(data)
data = encode_datetime_features_onehot(data, dim=128)

# 원본 feature와 추가한 가격 차이 feature들을 사용
features_to_bin = ['open', 'high', 'low', 'volume', 'value', 'William_R',
                   'ATR', 'OBV', 'Z_Score', 'Entropy', 'SMA_5', 'SMA_10', 
                   'SMA_20', 'SMA_60', 'SMA_120', 'SMA_250', 'RSI', 'BB_Upper', 'BB_Middle', 
                   'BB_Lower', 'MACD', 'Stochastic',
                   'close_open', 'high_low', 'high_open', 'high_close', 'open_low', 'close_low']

# datetime one-hot 컬럼: 이미 각각 128차원씩 4개 (총 512차원)
datetime_onehot_features = [col for col in data.columns if col.startswith('Hour_') or 
                              col.startswith('Day_') or col.startswith('Week_') or 
                              col.startswith('Month_')]

# 타깃으로 사용할 continuous 값들은 원본에서 보존 (아직 제거하지 않음)
# 여기서는 이후에 타깃 데이터로만 사용될 예정
data_targets = data[['open', 'close']].copy()

# 최종 입력 데이터: 타깃 컬럼은 제외하고, 오직 features_to_bin와 datetime one-hot 인코딩된 값만 사용
data_input = data[features_to_bin + datetime_onehot_features].dropna()

# 각 기술적 및 가격 차이 feature에 대해 rolling scaling 적용 (window=24)
for feature in features_to_bin:
    data_input[feature] = rolling_minmax_scale(data_input[feature], window=24)
data_input = data_input.dropna()

# 기술적 및 가격 차이 feature들을 128차원의 one-hot 벡터로 변환
data_input = bin_and_encode(data_input, features_to_bin, bins=128, drop_original=True)
final_input_columns = []
for feature in features_to_bin:
    final_input_columns.extend([f'{feature}_Bin_{i}' for i in range(128)])
final_input_columns.extend(datetime_onehot_features)
data_input = data_input[final_input_columns]

# 타깃 데이터: continuous 값, open과 close를 각각 사용
data_target = data_targets.rename(columns={'open': 'open_target', 'close': 'close_target'})
# data_target의 인덱스를 data_input과 맞춥니다.
common_index = data_input.index.intersection(data_target.index)
data_input = data_input.loc[common_index]
data_target = data_target.loc[common_index]

####################################
# 6-2. Dataset 정의 (입력과 타깃을 별도로 사용)
####################################
class TimeSeriesDataset(Dataset):
    def __init__(self, input_data, target_data, lookback=24):
        self.input_data = input_data.values
        self.target_data = target_data.values  # shape: (N, 2)
        self.lookback = lookback

    def __len__(self):
        return len(self.input_data) - self.lookback

    def __getitem__(self, idx):
        x = self.input_data[idx: idx + self.lookback, :]
        # 타깃: lookback 이후 시점의 open_target과 close_target
        open_val = self.target_data[idx + self.lookback, 0]
        close_val = self.target_data[idx + self.lookback, 1]
        # 캔들 유형: 양봉이면 1, 음봉이면 0 (양봉: close > open)
        y_target = 1 if close_val > open_val else 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y_target, dtype=torch.long)

####################################
# 7. Transformer Encoder-Decoder 직접 구현
####################################
# 7-1. Multi-Head Self-Attention (동일)
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        assert self.head_dim * num_heads == embed_dim, "embed_dim은 num_heads로 나누어떨어져야 합니다."
        
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key   = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)
        self.out   = nn.Linear(embed_dim, embed_dim)
        
    def forward(self, x, mask=None):
        batch_size, seq_len, embed_dim = x.shape
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        
        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        
        scores = torch.matmul(Q, K.transpose(-2, -1)) / sqrt(self.head_dim)
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        attn = torch.softmax(scores, dim=-1)
        out = torch.matmul(attn, V)
        
        out = out.transpose(1, 2).contiguous().view(batch_size, seq_len, embed_dim)
        out = self.out(out)
        return out

# 7-2. Feed-Forward Network (동일)
class FeedForward(nn.Module):
    def __init__(self, embed_dim, ffn_dim):
        super(FeedForward, self).__init__()
        self.fc1 = nn.Linear(embed_dim, ffn_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(ffn_dim, embed_dim)
        
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

# 7-3. Transformer Encoder Layer (동일)
class TransformerEncoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, ffn_dim):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.ffn = FeedForward(embed_dim, ffn_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        
    def forward(self, x, mask=None):
        attn_out = self.self_attn(x, mask)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x)
        x = self.norm2(x + ffn_out)
        return x

# 7-4. Transformer Decoder Layer
class TransformerDecoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, ffn_dim):
        super(TransformerDecoderLayer, self).__init__()
        # Masked self-attention for decoder
        self.self_attn = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm1 = nn.LayerNorm(embed_dim)
        # Cross attention: query from decoder, key/value from encoder output
        self.cross_attn = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.ffn = FeedForward(embed_dim, ffn_dim)
        self.norm3 = nn.LayerNorm(embed_dim)
        
    def forward(self, x, encoder_output, tgt_mask=None):
        # x: decoder input, encoder_output: encoder output
        self_attn_out = self.self_attn(x, mask=tgt_mask)
        x = self.norm1(x + self_attn_out)
        # For cross attention, we use x as query and encoder_output as key/value.
        cross_attn_out = self.cross_attn(x)
        x = self.norm2(x + cross_attn_out)
        ffn_out = self.ffn(x)
        x = self.norm3(x + ffn_out)
        return x

# 7-5. Encoder-Decoder Transformer 직접 구현
class EncoderDecoderTransformerCustom(nn.Module):
    def __init__(self, input_dim, embedding_dim=512, num_heads=8, num_encoder_layers=6, 
                 num_decoder_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=24):
        super(EncoderDecoderTransformerCustom, self).__init__()
        # Encoder
        self.token_embedding = nn.Linear(input_dim, embedding_dim)
        self.position_embedding = nn.Embedding(max_seq_len, embedding_dim)
        self.encoder_layers = nn.ModuleList([
            TransformerEncoderLayer(embedding_dim, num_heads, ffn_dim)
            for _ in range(num_encoder_layers)
        ])
        # Decoder: we'll use a learnable start token as decoder input (length 1)
        self.decoder_start_token = nn.Parameter(torch.randn(1, 1, embedding_dim))
        self.decoder_layers = nn.ModuleList([
            TransformerDecoderLayer(embedding_dim, num_heads, ffn_dim)
            for _ in range(num_decoder_layers)
        ])
        self.fc = nn.Linear(embedding_dim, num_classes)
        
    def forward(self, encoder_input):
        # encoder_input: (batch_size, seq_len, input_dim)
        batch_size, seq_len, _ = encoder_input.shape
        # Encoder
        x = self.token_embedding(encoder_input)
        positions = torch.arange(seq_len, device=x.device).unsqueeze(0).expand(batch_size, seq_len)
        x = x + self.position_embedding(positions)
        for layer in self.encoder_layers:
            x = layer(x)
        encoder_output = x  # (batch_size, seq_len, embedding_dim)
        
        # Decoder: use a fixed start token repeated for batch
        dec_input = self.decoder_start_token.expand(batch_size, -1, -1)  # (batch_size, 1, embedding_dim)
        # Since decoder input length=1, no mask needed.
        for layer in self.decoder_layers:
            dec_input = layer(dec_input, encoder_output)
        # dec_input: (batch_size, 1, embedding_dim)
        out = dec_input[:, -1, :]  # (batch_size, embedding_dim)
        logits = self.fc(out)
        return logits

####################################
# 8. 학습 및 평가 루프 (Fine-Tuning 및 Validation Accuracy 출력)
####################################
def evaluate_model(model, data_loader, device):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
    return total_loss / len(data_loader), correct / total

def train_model(model, train_loader, val_loader, num_epochs, lr, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
    
    best_val_loss = float('inf')
    best_state = None
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
        train_loss = total_loss / len(train_loader)
        train_acc = correct / total
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        scheduler.step(val_loss)
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_state = model.state_dict()
    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def train_and_evaluate(data, num_experiments=16, lookback=24, num_epochs=10):
    # 모델 입력은 one-hot 인코딩된 feature들만, 타깃은 continuous open_target과 close_target
    input_cols = []
    for feature in features_to_bin:
        input_cols.extend([f'{feature}_Bin_{i}' for i in range(128)])
    input_cols.extend(datetime_onehot_features)
    target_cols = ['open_target', 'close_target']
    
    # 타깃 컬럼 추가: 원본 'open', 'close' 값을 보존 (데이터 읽기 전 원본을 다시 읽어 사용)
    data_targets = pd.read_csv("BTC_upbit_KRW_min5.csv", index_col=0)
    data_targets.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
    data_targets.index = pd.to_datetime(data_targets.index)
    data_targets = data_targets[['open', 'close']].dropna()
    # data_input과 동일한 인덱스 사용: 교집합 사용
    common_index = data_input.index.intersection(data_targets.index)
    data_input = data_input.loc[common_index]
    data_target = data_targets.loc[common_index].rename(columns={'open': 'open_target', 'close': 'close_target'})
    
    step_size = 30000  # 이동 단위
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    val_acc_list = []
    test_acc_list = []
    
    for exp in range(num_experiments):
        train_start = exp * step_size
        train_end = train_start + step_size * 8
        val_end = train_end + step_size
        test_end = val_end + step_size
        if test_end > len(data_input):
            break
        print(data)
        
        train_input = data_input.iloc[train_start:train_end]
        train_target = data_target.iloc[train_start:train_end]
        val_input = data_input.iloc[train_end:val_end]
        val_target = data_target.iloc[train_end:val_end]
        test_input = data_input.iloc[val_end:test_end]
        test_target = data_target.iloc[val_end:test_end]
        
        train_dataset = TimeSeriesDataset(train_input, train_target, lookback=lookback)
        val_dataset = TimeSeriesDataset(val_input, val_target, lookback=lookback)
        test_dataset = TimeSeriesDataset(test_input, test_target, lookback=lookback)
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
        
        # Fine-Tuning: 이전 구간의 모델 파라미터를 그대로 로드하여 미세 조정
        lr = 1e-4
        input_dim = data_input.shape[1]
        model = EncoderDecoderTransformerCustom(input_dim=input_dim, embedding_dim=512, num_heads=8, 
                                                 num_encoder_layers=6, num_decoder_layers=6, ffn_dim=2048, 
                                                 num_classes=2, max_seq_len=lookback).to(device)
        model_path = f"model_experiment_{exp}.pth"
        if exp > 0:
            try:
                model.load_state_dict(torch.load(f"model_experiment_{exp - 1}.pth"))
                print(f"Loaded model from experiment {exp - 1} for fine-tuning.")
            except FileNotFoundError:
                print(f"Model file for experiment {exp - 1} not found. Starting fresh training.")
        
        print(f"Experiment {exp}: Training with lr={lr} (Fine-Tuning with Encoder-Decoder)")
        model = train_model(model, train_loader, val_loader, num_epochs, lr, device)
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for experiment {exp}.")
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        val_acc_list.append(val_acc)
        print(f"Experiment {exp}: Final Validation Accuracy: {val_acc:.4f}")
        
        test_loss, test_acc = evaluate_model(model, test_loader, device)
        test_acc_list.append(test_acc)
        print(f"Experiment {exp}: Test Accuracy: {test_acc:.4f}")
    
    avg_val_acc = sum(val_acc_list) / len(val_acc_list)
    avg_test_acc = sum(test_acc_list) / len(test_acc_list)
    print(f"\nFinal Average Validation Accuracy: {avg_val_acc:.4f}")
    print(f"Final Average Test Accuracy: {avg_test_acc:.4f}")

train_and_evaluate(data)


UnboundLocalError: local variable 'data_input' referenced before assignment