In [1]:
import pandas as pd
import numpy as np
import pandas_ta as ta
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from math import sqrt

####################################
# 1. 기술적 지표 계산 함수 (변경 없음)
####################################
def calculate_indicators(data):
    data['William_R'] = ta.willr(data['high'], data['low'], data['close'])
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'])
    data['OBV'] = ta.obv(data['close'], data['volume'])
    data['Z_Score'] = (data['close'] - data['close'].rolling(window=20).mean()) / data['close'].rolling(window=20).std()
    data['Entropy'] = ta.entropy(data['close'], length=14)
    data['SMA_5'] = data['close'].rolling(window=5).mean()
    data['SMA_10'] = data['close'].rolling(window=10).mean()
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_60'] = data['close'].rolling(window=60).mean()
    data['SMA_120'] = data['close'].rolling(window=120).mean()
    data['SMA_250'] = data['close'].rolling(window=250).mean()
    data['RSI'] = ta.rsi(data['close'])
    bb = ta.bbands(data['close'])
    data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
    macd = ta.macd(data['close'])
    data['MACD'] = macd.iloc[:, 0]
    data['Stochastic'] = ta.stoch(data['high'], data['low'], data['close']).iloc[:, 0]
    return data.dropna()

####################################
# 1-2. 추가 feature 계산 (가격 차이)
####################################
def calculate_price_differences(data):
    data['close_open'] = data['close'] - data['open']
    data['high_low'] = data['high'] - data['low']
    data['high_open'] = data['high'] - data['open']
    data['high_close'] = data['high'] - data['close']
    data['open_low'] = data['open'] - data['low']
    data['close_low'] = data['close'] - data['low']
    return data

####################################
# 2. Datetime Feature One-Hot Encoding (각 feature 128차원)
####################################
def encode_datetime_features_onehot(data, dim=128):
    if 'datetime' not in data.columns:
        data['datetime'] = pd.to_datetime(data.index)
    
    data['hour_of_day'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek
    data['week_of_month'] = (data['datetime'].dt.day - 1) // 7 + 1
    data['month'] = data['datetime'].dt.month

    def onehot_with_fixed_dim(series, prefix, dim):
        dummies = pd.get_dummies(series, prefix=prefix)
        expected_cols = [f"{prefix}_{i}" for i in range(dim)]
        dummies = dummies.reindex(columns=expected_cols, fill_value=0)
        return dummies

    hour_one_hot = onehot_with_fixed_dim(data['hour_of_day'], 'Hour', dim)
    day_one_hot = onehot_with_fixed_dim(data['day_of_week'], 'Day', dim)
    week_one_hot = onehot_with_fixed_dim(data['week_of_month'], 'Week', dim)
    month_one_hot = onehot_with_fixed_dim(data['month'], 'Month', dim)
    
    data = pd.concat([data, hour_one_hot, day_one_hot, week_one_hot, month_one_hot], axis=1)
    return data

####################################
# 3. Rolling MinMax Scaling (분모 0 방지)
####################################
def rolling_minmax_scale(series, window=24):
    roll_min = series.rolling(window=window, min_periods=window).min()
    roll_max = series.rolling(window=window, min_periods=window).max()
    # 기본 minmax scaling 계산 (분모에 아주 작은 epsilon 추가)
    scaled = (series - roll_min) / ((roll_max - roll_min) + 1e-8)
    # 무한대나 -무한대 값을 NaN으로 대체하고, NaN은 최대값 1.0으로 대체
    scaled = scaled.replace([np.inf, -np.inf], np.nan)
    scaled = scaled.fillna(1.0)
    # 혹시 1보다 큰 값이 있다면 최대값 1.0으로 클리핑
    scaled = scaled.clip(upper=1.0)
    return scaled


####################################
# 4. Binning 후 One-Hot 인코딩 (각 feature를 128차원으로)
####################################
def bin_and_encode(data, features, bins=128, drop_original=True):
    for feature in features:
        data[f'{feature}_Bin'] = pd.cut(data[feature], bins=bins, labels=False)
        one_hot = pd.get_dummies(data[f'{feature}_Bin'], prefix=f'{feature}_Bin')
        expected_columns = [f'{feature}_Bin_{i}' for i in range(bins)]
        one_hot = one_hot.reindex(columns=expected_columns, fill_value=0)
        data = pd.concat([data, one_hot], axis=1)
        if drop_original:
            data.drop(columns=[f'{feature}_Bin'], inplace=True)
    numeric_cols = data.select_dtypes(include=[np.number]).columns
    data[numeric_cols] = data[numeric_cols].astype(np.float32)
    return data

####################################
# 5. 데이터 로드 및 전처리
####################################
data = pd.read_csv("BTC_upbit_KRW_min60.csv", index_col=0)
data.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
data.index = pd.to_datetime(data.index)

# 기본 지표 계산 및 추가 feature 생성
data = calculate_indicators(data)
data = calculate_price_differences(data)
data = encode_datetime_features_onehot(data, dim=128)

# 타깃: close 값을 그대로 사용 (continuous)
data['close_target'] = data['close']

####################################
# [Bollinger Bands 관련 새로운 feature 계산]
####################################
data['BB_diff'] = data['BB_Upper'] - data['BB_Lower']
data['BB_close_upper'] = data['close'] - data['BB_Upper']
data['BB_close_lower'] = data['close'] - data['BB_Lower']

####################################
# [인코딩 대상 feature 목록 업데이트]
####################################
# 원래 feature들을 대상으로 전일 대비 상승률(= pct_change()*100)을 계산
features_to_bin = ['open', 'high', 'low', 'volume', 'value', 'William_R',
                   'ATR', 'OBV', 'Z_Score', 'Entropy', 'SMA_5', 'SMA_10', 
                   'SMA_20', 'SMA_60', 'SMA_120', 'SMA_250', 'RSI', 'MACD', 'Stochastic',
                   'close_open', 'high_low', 'high_open', 'high_close', 'open_low', 'close_low',
                   'BB_diff', 'BB_close_upper', 'BB_close_lower']

# 각 feature에 대해 전일 대비 상승률을 계산하고 rolling minmax scaling 적용
for feature in features_to_bin:
    col_pct = feature + '_pct'
    data[col_pct] = data[feature].pct_change() * 100
    data[col_pct] = rolling_minmax_scale(data[col_pct], window=24)
    
# NaN 제거 (pct_change 및 rolling scaling으로 인한)
data = data.dropna()

# pct_change 결과에 대해 binning과 one-hot 인코딩 수행
features_pct = [f + '_pct' for f in features_to_bin]
data = bin_and_encode(data, features_pct, bins=128, drop_original=True)

# datetime one-hot encoding된 컬럼 추출
datetime_onehot_features = [col for col in data.columns if col.startswith('Hour_') or 
                              col.startswith('Day_') or col.startswith('Week_') or 
                              col.startswith('Month_')]

# 최종 입력 데이터 구성: 각 지표의 상승률에 대한 one-hot 벡터와 datetime one-hot 벡터 결합
final_input_columns = []
for feature in features_pct:
    final_input_columns.extend([f'{feature}_Bin_{i}' for i in range(128)])
final_input_columns.extend(datetime_onehot_features)

final_target_column = ['close_target']

data_input = data[final_input_columns]
data_target = data[final_target_column]

####################################
# 6-2. Dataset 정의 (입력과 타깃을 별도로 사용)
####################################
class TimeSeriesDataset(Dataset):
    def __init__(self, input_data, target_data, lookback=24):
        self.input_data = input_data.values
        self.target_data = target_data.values  # shape: (N, 1)
        self.lookback = lookback

    def __len__(self):
        return len(self.input_data) - self.lookback

    def __getitem__(self, idx):
        x = self.input_data[idx: idx + self.lookback, :]
        y = self.target_data[idx + self.lookback, 0]
        y_prev = self.target_data[idx + self.lookback - 1, 0]
        y_target = 1 if y > y_prev else 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y_target, dtype=torch.long)

####################################
# 7. Transformer Encoder 직접 구현
####################################
# 7-1. Multi-Head Self-Attention
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        assert self.head_dim * num_heads == embed_dim, "embed_dim은 num_heads로 나누어떨어져야 합니다."
        
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key   = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)
        self.out   = nn.Linear(embed_dim, embed_dim)
        
    def forward(self, x):
        batch_size, seq_len, embed_dim = x.shape
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        scores = torch.matmul(Q, K.transpose(-2, -1)) / sqrt(self.head_dim)
        attn = torch.softmax(scores, dim=-1)
        out = torch.matmul(attn, V)
        out = out.transpose(1, 2).contiguous().view(batch_size, seq_len, embed_dim)
        out = self.out(out)
        return out

# 7-2. Feed-Forward Network
class FeedForward(nn.Module):
    def __init__(self, embed_dim, ffn_dim):
        super(FeedForward, self).__init__()
        self.fc1 = nn.Linear(embed_dim, ffn_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(ffn_dim, embed_dim)
        
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

# 7-3. Transformer Encoder Layer (Self-Attention + FFN + Residual + LayerNorm)
class TransformerEncoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, ffn_dim):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.ffn = FeedForward(embed_dim, ffn_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        
    def forward(self, x):
        attn_out = self.self_attn(x)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x)
        x = self.norm2(x + ffn_out)
        return x

# 7-4. Encoder-Only Transformer 직접 구현 (lookback=24이므로, max_seq_len=24)
class EncoderOnlyTransformerCustom(nn.Module):
    def __init__(self, input_dim, embedding_dim=512, num_heads=8, num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=24):
        super(EncoderOnlyTransformerCustom, self).__init__()
        self.token_embedding = nn.Linear(input_dim, embedding_dim)
        self.position_embedding = nn.Embedding(max_seq_len, embedding_dim)
        self.layers = nn.ModuleList([
            TransformerEncoderLayer(embedding_dim, num_heads, ffn_dim)
            for _ in range(num_layers)
        ])
        self.fc = nn.Linear(embedding_dim, num_classes)
        
    def forward(self, x):
        batch_size, seq_len, _ = x.shape
        x = self.token_embedding(x)
        positions = torch.arange(seq_len, device=x.device).unsqueeze(0).expand(batch_size, seq_len)
        x = x + self.position_embedding(positions)
        for layer in self.layers:
            x = layer(x)
        x = x[:, -1, :]
        return self.fc(x)

####################################
# 8. 학습 및 평가 루프 (Fine-tuning 및 Validation Accuracy 출력)
####################################
def evaluate_model(model, data_loader, device):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
    return total_loss / len(data_loader), correct / total

def train_model(model, train_loader, val_loader, num_epochs, lr, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
    
    best_val_loss = float('inf')
    best_state = None
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
        train_loss = total_loss / len(train_loader)
        train_acc = correct / total
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        scheduler.step(val_loss)
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_state = model.state_dict()
    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def train_and_evaluate(data, num_experiments=16, lookback=24, num_epochs=10):
    # 최종 입력은 각 지표의 상승률(one-hot 인코딩된)과 datetime one-hot 인코딩 결합
    input_cols = []
    for feature in features_pct:
        input_cols.extend([f'{feature}_Bin_{i}' for i in range(128)])
    input_cols.extend(datetime_onehot_features)
    target_cols = ['close_target']
    
    data_input = data[input_cols]
    data_target = data[target_cols]
    
    # 여기서 모든 컬럼을 numeric 타입으로 변환
    data_input = data_input.apply(pd.to_numeric)
    data_input = data_input.astype(np.float32)
    data_target = data_target.apply(pd.to_numeric)
    data_target = data_target.astype(np.float32)
    
    step_size = 2500  # 데이터 구간 이동 단위
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    val_acc_list = []
    test_acc_list = []
    data_input.to_csv("onehotenc.csv")
    
    for exp in range(num_experiments):
        train_start = exp * step_size
        train_end = train_start + step_size * 8
        val_end = train_end + step_size
        test_end = val_end + step_size
        if test_end > len(data_input):
            break
        print(f"Experiment {exp}: 데이터 구간 [{train_start}:{test_end}]")
        print(data)
        
        train_input = data_input.iloc[train_start:train_end]
        train_target = data_target.iloc[train_start:train_end]
        val_input = data_input.iloc[train_end:val_end]
        val_target = data_target.iloc[train_end:val_end]
        test_input = data_input.iloc[val_end:test_end]
        test_target = data_target.iloc[val_end:test_end]
        
        train_dataset = TimeSeriesDataset(train_input, train_target, lookback=lookback)
        val_dataset = TimeSeriesDataset(val_input, val_target, lookback=lookback)
        test_dataset = TimeSeriesDataset(test_input, test_target, lookback=lookback)
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
        
        lr = 1e-4
        input_dim = data_input.shape[1]
        model = EncoderOnlyTransformerCustom(input_dim=input_dim, embedding_dim=512, num_heads=8, 
                                               num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=lookback).to(device)
        model_path = f"model_experiment_{exp}.pth"
        if exp > 0:
            try:
                model.load_state_dict(torch.load(f"model_experiment_{exp - 1}.pth"))
                print(f"Loaded model from experiment {exp - 1} for fine-tuning.")
            except FileNotFoundError:
                print(f"Model file for experiment {exp - 1} not found. Starting fresh training.")
        
        print(f"Experiment {exp}: Training with lr={lr} (Fine-Tuning)")
        model = train_model(model, train_loader, val_loader, num_epochs, lr, device)
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for experiment {exp}.")
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        val_acc_list.append(val_acc)
        print(f"Experiment {exp}: Final Validation Accuracy: {val_acc:.4f}")
        
        test_loss, test_acc = evaluate_model(model, test_loader, device)
        test_acc_list.append(test_acc)
        print(f"Experiment {exp}: Test Accuracy: {test_acc:.4f}")
    
    if len(val_acc_list) > 0:
        avg_val_acc = sum(val_acc_list) / len(val_acc_list)
        avg_test_acc = sum(test_acc_list) / len(test_acc_list)
        print(f"\nFinal Average Validation Accuracy: {avg_val_acc:.4f}")
        print(f"Final Average Test Accuracy: {avg_test_acc:.4f}")
    else:
        print("실험이 한 번도 실행되지 않았습니다.")


# # features_pct 리스트: 각 원본 feature에 대해 '_pct'가 붙은 컬럼명
# features_pct = [f + '_pct' for f in features_to_bin]

train_and_evaluate(data)


Experiment 0: 데이터 구간 [0:25000]
                            open         high          low        close  \
2017-10-06 03:00:00    4923000.0    4985000.0    4923000.0    4971000.0   
2017-10-06 04:00:00    4972000.0    4989000.0    4941000.0    4951000.0   
2017-10-06 05:00:00    4953000.0    4990000.0    4917000.0    4949000.0   
2017-10-06 06:00:00    4948000.0    4975000.0    4928000.0    4964000.0   
2017-10-06 07:00:00    4965000.0    4985000.0    4926000.0    4984000.0   
...                          ...          ...          ...          ...   
2024-12-18 12:00:00  153867008.0  153870000.0  153000000.0  153452000.0   
2024-12-18 13:00:00  153451008.0  153452000.0  150348000.0  152200000.0   
2024-12-18 14:00:00  152200000.0  152800000.0  151382000.0  151994000.0   
2024-12-18 15:00:00  151904992.0  152500000.0  151524000.0  152184992.0   
2024-12-18 16:00:00  152186000.0  152696992.0  151999008.0  152559008.0   

                          volume         value  William_R           



Epoch 1/10 | Train Loss: 0.7260, Train Acc: 0.5015 | Val Loss: 0.6938, Val Acc: 0.4891
Epoch 2/10 | Train Loss: 0.6965, Train Acc: 0.5092 | Val Loss: 0.6960, Val Acc: 0.5162
Epoch 3/10 | Train Loss: 0.6837, Train Acc: 0.5634 | Val Loss: 0.6925, Val Acc: 0.5121
Epoch 4/10 | Train Loss: 0.6666, Train Acc: 0.5987 | Val Loss: 0.6966, Val Acc: 0.5170
Epoch 5/10 | Train Loss: 0.6469, Train Acc: 0.6303 | Val Loss: 0.7146, Val Acc: 0.5311
Epoch 6/10 | Train Loss: 0.6390, Train Acc: 0.6435 | Val Loss: 0.7567, Val Acc: 0.5170
Epoch 7/10 | Train Loss: 0.6093, Train Acc: 0.6771 | Val Loss: 0.7968, Val Acc: 0.5323
Epoch 8/10 | Train Loss: 0.5956, Train Acc: 0.6917 | Val Loss: 0.8065, Val Acc: 0.5166
Epoch 9/10 | Train Loss: 0.5836, Train Acc: 0.7034 | Val Loss: 0.8026, Val Acc: 0.4984
Epoch 10/10 | Train Loss: 0.5613, Train Acc: 0.7255 | Val Loss: 0.8600, Val Acc: 0.5133
Saved model for experiment 0.
Experiment 0: Final Validation Accuracy: 0.5133
Experiment 0: Test Accuracy: 0.5291
Experiment 1: 데



Epoch 1/10 | Train Loss: 0.6123, Train Acc: 0.6763 | Val Loss: 0.7642, Val Acc: 0.5323
Epoch 2/10 | Train Loss: 0.5959, Train Acc: 0.6944 | Val Loss: 0.7660, Val Acc: 0.5359
Epoch 3/10 | Train Loss: 0.5751, Train Acc: 0.7094 | Val Loss: 0.8217, Val Acc: 0.5214
Epoch 4/10 | Train Loss: 0.5512, Train Acc: 0.7298 | Val Loss: 0.8471, Val Acc: 0.5162
Epoch 5/10 | Train Loss: 0.4940, Train Acc: 0.7728 | Val Loss: 0.9979, Val Acc: 0.5125
Epoch 6/10 | Train Loss: 0.4627, Train Acc: 0.7938 | Val Loss: 0.8984, Val Acc: 0.5016
Epoch 7/10 | Train Loss: 0.4356, Train Acc: 0.8108 | Val Loss: 1.0866, Val Acc: 0.5299
Epoch 8/10 | Train Loss: 0.3887, Train Acc: 0.8395 | Val Loss: 1.2394, Val Acc: 0.5299
Epoch 9/10 | Train Loss: 0.3629, Train Acc: 0.8553 | Val Loss: 1.1947, Val Acc: 0.5097
Epoch 10/10 | Train Loss: 0.3427, Train Acc: 0.8658 | Val Loss: 1.2744, Val Acc: 0.5065
Saved model for experiment 1.
Experiment 1: Final Validation Accuracy: 0.5065
Experiment 1: Test Accuracy: 0.5061
Experiment 2: 데



Epoch 1/10 | Train Loss: 0.5120, Train Acc: 0.7684 | Val Loss: 0.8869, Val Acc: 0.5109
Epoch 2/10 | Train Loss: 0.4881, Train Acc: 0.7841 | Val Loss: 0.9159, Val Acc: 0.5166
Epoch 3/10 | Train Loss: 0.4512, Train Acc: 0.8042 | Val Loss: 0.9831, Val Acc: 0.4972
Epoch 4/10 | Train Loss: 0.4198, Train Acc: 0.8218 | Val Loss: 1.0614, Val Acc: 0.5081
Epoch 5/10 | Train Loss: 0.3410, Train Acc: 0.8668 | Val Loss: 1.1872, Val Acc: 0.5125
Epoch 6/10 | Train Loss: 0.2970, Train Acc: 0.8911 | Val Loss: 1.4321, Val Acc: 0.5032
Epoch 7/10 | Train Loss: 0.2728, Train Acc: 0.9017 | Val Loss: 1.5087, Val Acc: 0.5044
Epoch 8/10 | Train Loss: 0.2140, Train Acc: 0.9280 | Val Loss: 1.6033, Val Acc: 0.5093
Epoch 9/10 | Train Loss: 0.1930, Train Acc: 0.9345 | Val Loss: 1.8835, Val Acc: 0.5073
Epoch 10/10 | Train Loss: 0.1753, Train Acc: 0.9412 | Val Loss: 2.0673, Val Acc: 0.5008
Saved model for experiment 2.
Experiment 2: Final Validation Accuracy: 0.5008
Experiment 2: Test Accuracy: 0.5089
Experiment 3: 데



Epoch 1/10 | Train Loss: 0.4162, Train Acc: 0.8330 | Val Loss: 1.0067, Val Acc: 0.5069
Epoch 2/10 | Train Loss: 0.3833, Train Acc: 0.8491 | Val Loss: 1.1409, Val Acc: 0.5117
Epoch 3/10 | Train Loss: 0.3528, Train Acc: 0.8588 | Val Loss: 1.2301, Val Acc: 0.5093
Epoch 4/10 | Train Loss: 0.3084, Train Acc: 0.8806 | Val Loss: 1.5718, Val Acc: 0.4976
Epoch 5/10 | Train Loss: 0.2149, Train Acc: 0.9203 | Val Loss: 1.7805, Val Acc: 0.4960
Epoch 6/10 | Train Loss: 0.1688, Train Acc: 0.9418 | Val Loss: 1.9491, Val Acc: 0.5065
Epoch 7/10 | Train Loss: 0.1426, Train Acc: 0.9529 | Val Loss: 2.2023, Val Acc: 0.4976
Epoch 8/10 | Train Loss: 0.0838, Train Acc: 0.9740 | Val Loss: 3.0592, Val Acc: 0.5053
Epoch 9/10 | Train Loss: 0.0633, Train Acc: 0.9825 | Val Loss: 3.0569, Val Acc: 0.4984
Epoch 10/10 | Train Loss: 0.0495, Train Acc: 0.9869 | Val Loss: 3.3977, Val Acc: 0.4988
Saved model for experiment 3.
Experiment 3: Final Validation Accuracy: 0.4988
Experiment 3: Test Accuracy: 0.5129
Experiment 4: 데



Epoch 1/10 | Train Loss: 0.3482, Train Acc: 0.8701 | Val Loss: 1.0787, Val Acc: 0.5008
Epoch 2/10 | Train Loss: 0.2755, Train Acc: 0.8927 | Val Loss: 1.2517, Val Acc: 0.5081
Epoch 3/10 | Train Loss: 0.2262, Train Acc: 0.9120 | Val Loss: 1.4976, Val Acc: 0.5121
Epoch 4/10 | Train Loss: 0.2045, Train Acc: 0.9240 | Val Loss: 1.7117, Val Acc: 0.5089
Epoch 5/10 | Train Loss: 0.1233, Train Acc: 0.9574 | Val Loss: 1.6256, Val Acc: 0.5097
Epoch 6/10 | Train Loss: 0.0862, Train Acc: 0.9730 | Val Loss: 2.5272, Val Acc: 0.5129
Epoch 7/10 | Train Loss: 0.0671, Train Acc: 0.9793 | Val Loss: 2.9885, Val Acc: 0.5133
Epoch 8/10 | Train Loss: 0.0278, Train Acc: 0.9926 | Val Loss: 3.6468, Val Acc: 0.5141
Epoch 9/10 | Train Loss: 0.0133, Train Acc: 0.9967 | Val Loss: 4.1521, Val Acc: 0.5117
Epoch 10/10 | Train Loss: 0.0101, Train Acc: 0.9977 | Val Loss: 4.3364, Val Acc: 0.5182
Saved model for experiment 4.
Experiment 4: Final Validation Accuracy: 0.5182
Experiment 4: Test Accuracy: 0.5036
Experiment 5: 데



Epoch 1/10 | Train Loss: 0.2892, Train Acc: 0.8952 | Val Loss: 1.3493, Val Acc: 0.5065
Epoch 2/10 | Train Loss: 0.2303, Train Acc: 0.9093 | Val Loss: 1.4064, Val Acc: 0.5166
Epoch 3/10 | Train Loss: 0.1980, Train Acc: 0.9211 | Val Loss: 2.0359, Val Acc: 0.5162
Epoch 4/10 | Train Loss: 0.1765, Train Acc: 0.9327 | Val Loss: 1.8674, Val Acc: 0.4992
Epoch 5/10 | Train Loss: 0.1018, Train Acc: 0.9650 | Val Loss: 2.2574, Val Acc: 0.5117
Epoch 6/10 | Train Loss: 0.0720, Train Acc: 0.9779 | Val Loss: 3.2645, Val Acc: 0.5299
Epoch 7/10 | Train Loss: 0.0524, Train Acc: 0.9851 | Val Loss: 3.4498, Val Acc: 0.5238
Epoch 8/10 | Train Loss: 0.0212, Train Acc: 0.9943 | Val Loss: 3.8973, Val Acc: 0.5222
Epoch 9/10 | Train Loss: 0.0060, Train Acc: 0.9986 | Val Loss: 4.3413, Val Acc: 0.5145
Epoch 10/10 | Train Loss: 0.0057, Train Acc: 0.9989 | Val Loss: 4.4645, Val Acc: 0.5202
Saved model for experiment 5.
Experiment 5: Final Validation Accuracy: 0.5202
Experiment 5: Test Accuracy: 0.5016
Experiment 6: 데



Epoch 1/10 | Train Loss: 0.2919, Train Acc: 0.9008 | Val Loss: 1.3479, Val Acc: 0.5158
Epoch 2/10 | Train Loss: 0.2343, Train Acc: 0.9154 | Val Loss: 1.6928, Val Acc: 0.5137
Epoch 3/10 | Train Loss: 0.1838, Train Acc: 0.9370 | Val Loss: 1.8498, Val Acc: 0.5174
Epoch 4/10 | Train Loss: 0.1596, Train Acc: 0.9460 | Val Loss: 2.2218, Val Acc: 0.5170
Epoch 5/10 | Train Loss: 0.0944, Train Acc: 0.9715 | Val Loss: 2.6561, Val Acc: 0.5032
Epoch 6/10 | Train Loss: 0.0582, Train Acc: 0.9841 | Val Loss: 3.1527, Val Acc: 0.5000
Epoch 7/10 | Train Loss: 0.0413, Train Acc: 0.9892 | Val Loss: 3.3230, Val Acc: 0.5117
Epoch 8/10 | Train Loss: 0.0158, Train Acc: 0.9958 | Val Loss: 3.9409, Val Acc: 0.5113
Epoch 9/10 | Train Loss: 0.0063, Train Acc: 0.9985 | Val Loss: 4.3579, Val Acc: 0.5069
Epoch 10/10 | Train Loss: 0.0061, Train Acc: 0.9985 | Val Loss: 4.5114, Val Acc: 0.5032
Saved model for experiment 6.
Experiment 6: Final Validation Accuracy: 0.5032
Experiment 6: Test Accuracy: 0.4911
Experiment 7: 데



Epoch 1/10 | Train Loss: 0.2871, Train Acc: 0.9024 | Val Loss: 1.6744, Val Acc: 0.4855
Epoch 2/10 | Train Loss: 0.2119, Train Acc: 0.9211 | Val Loss: 1.9377, Val Acc: 0.5053
Epoch 3/10 | Train Loss: 0.1758, Train Acc: 0.9327 | Val Loss: 2.0805, Val Acc: 0.5048
Epoch 4/10 | Train Loss: 0.1573, Train Acc: 0.9426 | Val Loss: 2.2688, Val Acc: 0.4956
Epoch 5/10 | Train Loss: 0.0814, Train Acc: 0.9733 | Val Loss: 3.2215, Val Acc: 0.5012
Epoch 6/10 | Train Loss: 0.0502, Train Acc: 0.9849 | Val Loss: 3.7046, Val Acc: 0.5036
Epoch 7/10 | Train Loss: 0.0364, Train Acc: 0.9909 | Val Loss: 3.5704, Val Acc: 0.5065
Epoch 8/10 | Train Loss: 0.0112, Train Acc: 0.9974 | Val Loss: 4.0047, Val Acc: 0.4988
Epoch 9/10 | Train Loss: 0.0066, Train Acc: 0.9988 | Val Loss: 4.1931, Val Acc: 0.5016
Epoch 10/10 | Train Loss: 0.0036, Train Acc: 0.9994 | Val Loss: 4.2868, Val Acc: 0.5044
Saved model for experiment 7.
Experiment 7: Final Validation Accuracy: 0.5044
Experiment 7: Test Accuracy: 0.5040
Experiment 8: 데



Epoch 1/10 | Train Loss: 0.3017, Train Acc: 0.9003 | Val Loss: 1.7336, Val Acc: 0.5109
Epoch 2/10 | Train Loss: 0.2310, Train Acc: 0.9194 | Val Loss: 2.0691, Val Acc: 0.5020
Epoch 3/10 | Train Loss: 0.1815, Train Acc: 0.9356 | Val Loss: 1.8416, Val Acc: 0.5093
Epoch 4/10 | Train Loss: 0.1617, Train Acc: 0.9428 | Val Loss: 1.9279, Val Acc: 0.5089
Epoch 5/10 | Train Loss: 0.0973, Train Acc: 0.9697 | Val Loss: 2.9549, Val Acc: 0.5077
Epoch 6/10 | Train Loss: 0.0637, Train Acc: 0.9804 | Val Loss: 2.8901, Val Acc: 0.5210
Epoch 7/10 | Train Loss: 0.0491, Train Acc: 0.9862 | Val Loss: 3.4318, Val Acc: 0.5048
Epoch 8/10 | Train Loss: 0.0230, Train Acc: 0.9943 | Val Loss: 3.4291, Val Acc: 0.5137
Epoch 9/10 | Train Loss: 0.0114, Train Acc: 0.9973 | Val Loss: 3.7579, Val Acc: 0.5109
Epoch 10/10 | Train Loss: 0.0075, Train Acc: 0.9982 | Val Loss: 3.7917, Val Acc: 0.5149
Saved model for experiment 8.
Experiment 8: Final Validation Accuracy: 0.5149
Experiment 8: Test Accuracy: 0.4988
Experiment 9: 데



Epoch 1/10 | Train Loss: 0.2731, Train Acc: 0.9059 | Val Loss: 1.8086, Val Acc: 0.5162
Epoch 2/10 | Train Loss: 0.2181, Train Acc: 0.9224 | Val Loss: 1.7901, Val Acc: 0.5137
Epoch 3/10 | Train Loss: 0.1721, Train Acc: 0.9385 | Val Loss: 2.3084, Val Acc: 0.5024
Epoch 4/10 | Train Loss: 0.1567, Train Acc: 0.9469 | Val Loss: 1.7541, Val Acc: 0.5061
Epoch 5/10 | Train Loss: 0.1333, Train Acc: 0.9534 | Val Loss: 2.1015, Val Acc: 0.5048
Epoch 6/10 | Train Loss: 0.1180, Train Acc: 0.9621 | Val Loss: 2.3035, Val Acc: 0.5032
Epoch 7/10 | Train Loss: 0.1083, Train Acc: 0.9667 | Val Loss: 2.1382, Val Acc: 0.5081
Epoch 8/10 | Train Loss: 0.0514, Train Acc: 0.9851 | Val Loss: 3.0880, Val Acc: 0.5069
Epoch 9/10 | Train Loss: 0.0260, Train Acc: 0.9931 | Val Loss: 3.3027, Val Acc: 0.5097
Epoch 10/10 | Train Loss: 0.0161, Train Acc: 0.9965 | Val Loss: 3.7295, Val Acc: 0.5004
Saved model for experiment 9.
Experiment 9: Final Validation Accuracy: 0.5004
Experiment 9: Test Accuracy: 0.5073
Experiment 10: 



Epoch 1/10 | Train Loss: 0.2585, Train Acc: 0.9144 | Val Loss: 2.0238, Val Acc: 0.4964
Epoch 2/10 | Train Loss: 0.1842, Train Acc: 0.9337 | Val Loss: 1.8821, Val Acc: 0.5044
Epoch 3/10 | Train Loss: 0.1550, Train Acc: 0.9458 | Val Loss: 2.2241, Val Acc: 0.4980
Epoch 4/10 | Train Loss: 0.1369, Train Acc: 0.9547 | Val Loss: 2.7700, Val Acc: 0.5057
Epoch 5/10 | Train Loss: 0.1228, Train Acc: 0.9604 | Val Loss: 2.4993, Val Acc: 0.5028
Epoch 6/10 | Train Loss: 0.0580, Train Acc: 0.9831 | Val Loss: 3.0853, Val Acc: 0.4984
Epoch 7/10 | Train Loss: 0.0329, Train Acc: 0.9906 | Val Loss: 3.3720, Val Acc: 0.4984
Epoch 8/10 | Train Loss: 0.0224, Train Acc: 0.9949 | Val Loss: 3.3663, Val Acc: 0.4992
Epoch 9/10 | Train Loss: 0.0099, Train Acc: 0.9979 | Val Loss: 3.6284, Val Acc: 0.5016
Epoch 10/10 | Train Loss: 0.0059, Train Acc: 0.9990 | Val Loss: 3.7026, Val Acc: 0.5028
Saved model for experiment 10.
Experiment 10: Final Validation Accuracy: 0.5028
Experiment 10: Test Accuracy: 0.5004
Experiment 1



Epoch 1/10 | Train Loss: 0.2293, Train Acc: 0.9164 | Val Loss: 2.0988, Val Acc: 0.5081
Epoch 2/10 | Train Loss: 0.1773, Train Acc: 0.9285 | Val Loss: 2.2182, Val Acc: 0.4992
Epoch 3/10 | Train Loss: 0.1523, Train Acc: 0.9415 | Val Loss: 2.1355, Val Acc: 0.5036
Epoch 4/10 | Train Loss: 0.1331, Train Acc: 0.9517 | Val Loss: 2.3064, Val Acc: 0.5048
Epoch 5/10 | Train Loss: 0.0741, Train Acc: 0.9749 | Val Loss: 3.1970, Val Acc: 0.5044
Epoch 6/10 | Train Loss: 0.0491, Train Acc: 0.9857 | Val Loss: 3.1743, Val Acc: 0.5089
Epoch 7/10 | Train Loss: 0.0292, Train Acc: 0.9925 | Val Loss: 3.4707, Val Acc: 0.5012
Epoch 8/10 | Train Loss: 0.0121, Train Acc: 0.9972 | Val Loss: 3.8107, Val Acc: 0.5012
Epoch 9/10 | Train Loss: 0.0033, Train Acc: 0.9994 | Val Loss: 4.1986, Val Acc: 0.4952
Epoch 10/10 | Train Loss: 0.0041, Train Acc: 0.9993 | Val Loss: 4.2134, Val Acc: 0.4980
Saved model for experiment 11.
Experiment 11: Final Validation Accuracy: 0.4980
Experiment 11: Test Accuracy: 0.5073
Experiment 1



Epoch 1/10 | Train Loss: 0.2717, Train Acc: 0.9132 | Val Loss: 1.2281, Val Acc: 0.5174
Epoch 2/10 | Train Loss: 0.1970, Train Acc: 0.9348 | Val Loss: 2.1495, Val Acc: 0.5008
Epoch 3/10 | Train Loss: 0.1541, Train Acc: 0.9494 | Val Loss: 2.1157, Val Acc: 0.5105
Epoch 4/10 | Train Loss: 0.1277, Train Acc: 0.9607 | Val Loss: 2.0969, Val Acc: 0.5024
Epoch 5/10 | Train Loss: 0.0659, Train Acc: 0.9816 | Val Loss: 3.0103, Val Acc: 0.5028
Epoch 6/10 | Train Loss: 0.0367, Train Acc: 0.9918 | Val Loss: 3.3847, Val Acc: 0.5093
Epoch 7/10 | Train Loss: 0.0264, Train Acc: 0.9945 | Val Loss: 3.5159, Val Acc: 0.5040
Epoch 8/10 | Train Loss: 0.0098, Train Acc: 0.9979 | Val Loss: 3.9279, Val Acc: 0.5073
Epoch 9/10 | Train Loss: 0.0056, Train Acc: 0.9991 | Val Loss: 3.8916, Val Acc: 0.5113
Epoch 10/10 | Train Loss: 0.0070, Train Acc: 0.9989 | Val Loss: 3.5352, Val Acc: 0.5129
Saved model for experiment 12.
Experiment 12: Final Validation Accuracy: 0.5129
Experiment 12: Test Accuracy: 0.4891
Experiment 1



Epoch 1/10 | Train Loss: 0.2557, Train Acc: 0.9141 | Val Loss: 1.4121, Val Acc: 0.4923
Epoch 2/10 | Train Loss: 0.1870, Train Acc: 0.9374 | Val Loss: 2.0687, Val Acc: 0.4992
Epoch 3/10 | Train Loss: 0.1496, Train Acc: 0.9518 | Val Loss: 2.2977, Val Acc: 0.4935
Epoch 4/10 | Train Loss: 0.1222, Train Acc: 0.9629 | Val Loss: 1.7865, Val Acc: 0.4988
Epoch 5/10 | Train Loss: 0.0637, Train Acc: 0.9829 | Val Loss: 3.1081, Val Acc: 0.5020
Epoch 6/10 | Train Loss: 0.0353, Train Acc: 0.9914 | Val Loss: 3.4690, Val Acc: 0.4984
Epoch 7/10 | Train Loss: 0.0226, Train Acc: 0.9946 | Val Loss: 3.3609, Val Acc: 0.4875
Epoch 8/10 | Train Loss: 0.0086, Train Acc: 0.9981 | Val Loss: 3.8803, Val Acc: 0.4899
Epoch 9/10 | Train Loss: 0.0024, Train Acc: 0.9995 | Val Loss: 4.0033, Val Acc: 0.4826
Epoch 10/10 | Train Loss: 0.0017, Train Acc: 0.9996 | Val Loss: 4.3116, Val Acc: 0.4887
Saved model for experiment 13.
Experiment 13: Final Validation Accuracy: 0.4887
Experiment 13: Test Accuracy: 0.5117
Experiment 1



Epoch 1/10 | Train Loss: 0.2713, Train Acc: 0.9172 | Val Loss: 1.3128, Val Acc: 0.5198
Epoch 2/10 | Train Loss: 0.2061, Train Acc: 0.9368 | Val Loss: 1.5328, Val Acc: 0.5141
Epoch 3/10 | Train Loss: 0.1503, Train Acc: 0.9524 | Val Loss: 2.0918, Val Acc: 0.5097
Epoch 4/10 | Train Loss: 0.1228, Train Acc: 0.9632 | Val Loss: 1.9606, Val Acc: 0.5044
Epoch 5/10 | Train Loss: 0.0609, Train Acc: 0.9846 | Val Loss: 3.1661, Val Acc: 0.5089
Epoch 6/10 | Train Loss: 0.0465, Train Acc: 0.9895 | Val Loss: 2.6007, Val Acc: 0.5113
Epoch 7/10 | Train Loss: 0.0300, Train Acc: 0.9938 | Val Loss: 3.1066, Val Acc: 0.5048
Epoch 8/10 | Train Loss: 0.0123, Train Acc: 0.9979 | Val Loss: 3.4497, Val Acc: 0.5121
Epoch 9/10 | Train Loss: 0.0102, Train Acc: 0.9983 | Val Loss: 3.5151, Val Acc: 0.5057
Epoch 10/10 | Train Loss: 0.0080, Train Acc: 0.9988 | Val Loss: 3.6060, Val Acc: 0.5061
Saved model for experiment 14.
Experiment 14: Final Validation Accuracy: 0.5061
Experiment 14: Test Accuracy: 0.5162
Experiment 1



Epoch 1/10 | Train Loss: 0.2463, Train Acc: 0.9165 | Val Loss: 1.9224, Val Acc: 0.4903
Epoch 2/10 | Train Loss: 0.1736, Train Acc: 0.9381 | Val Loss: 2.4500, Val Acc: 0.4964
Epoch 3/10 | Train Loss: 0.1407, Train Acc: 0.9547 | Val Loss: 2.3631, Val Acc: 0.5105
Epoch 4/10 | Train Loss: 0.1120, Train Acc: 0.9675 | Val Loss: 2.3358, Val Acc: 0.5044
Epoch 5/10 | Train Loss: 0.0535, Train Acc: 0.9854 | Val Loss: 2.6492, Val Acc: 0.5089
Epoch 6/10 | Train Loss: 0.0288, Train Acc: 0.9938 | Val Loss: 2.8420, Val Acc: 0.5101
Epoch 7/10 | Train Loss: 0.0188, Train Acc: 0.9960 | Val Loss: 3.2584, Val Acc: 0.5081
Epoch 8/10 | Train Loss: 0.0113, Train Acc: 0.9978 | Val Loss: 3.6948, Val Acc: 0.5125
Epoch 9/10 | Train Loss: 0.0036, Train Acc: 0.9995 | Val Loss: 3.6955, Val Acc: 0.5145
Epoch 10/10 | Train Loss: 0.0026, Train Acc: 0.9997 | Val Loss: 3.7702, Val Acc: 0.5024
Saved model for experiment 15.
Experiment 15: Final Validation Accuracy: 0.5024
Experiment 15: Test Accuracy: 0.5109

Final Avera

In [2]:
import pandas as pd
import numpy as np
import pandas_ta as ta
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from math import sqrt

####################################
# 1. 기술적 지표 계산 함수 (변경 없음)
####################################
def calculate_indicators(data):
    data['William_R'] = ta.willr(data['high'], data['low'], data['close'])
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'])
    data['OBV'] = ta.obv(data['close'], data['volume'])
    data['Z_Score'] = (data['close'] - data['close'].rolling(window=20).mean()) / data['close'].rolling(window=20).std()
    data['Entropy'] = ta.entropy(data['close'], length=14)
    data['SMA_5'] = data['close'].rolling(window=5).mean()
    data['SMA_10'] = data['close'].rolling(window=10).mean()
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_60'] = data['close'].rolling(window=60).mean()
    data['SMA_120'] = data['close'].rolling(window=120).mean()
    data['SMA_250'] = data['close'].rolling(window=250).mean()
    data['RSI'] = ta.rsi(data['close'])
    bb = ta.bbands(data['close'])
    data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
    macd = ta.macd(data['close'])
    data['MACD'] = macd.iloc[:, 0]
    data['Stochastic'] = ta.stoch(data['high'], data['low'], data['close']).iloc[:, 0]
    return data.dropna()

####################################
# 1-2. 추가 feature 계산 (가격 차이)
####################################
def calculate_price_differences(data): # 포함 시 50.07%
    data['close_open'] = data['close'] - data['open']
    data['high_low'] = data['high'] - data['low']
    data['high_open'] = data['high'] - data['open']
    data['high_close'] = data['high'] - data['close']
    data['open_low'] = data['open'] - data['low']
    data['close_low'] = data['close'] - data['low']
    return data

####################################
# 2. Datetime Feature One-Hot Encoding (각 feature 128차원)
####################################
def encode_datetime_features_onehot(data, dim=128):
    if 'datetime' not in data.columns:
        data['datetime'] = pd.to_datetime(data.index)
    
    data['hour_of_day'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek
    data['week_of_month'] = (data['datetime'].dt.day - 1) // 7 + 1
    data['month'] = data['datetime'].dt.month

    def onehot_with_fixed_dim(series, prefix, dim):
        dummies = pd.get_dummies(series, prefix=prefix)
        expected_cols = [f"{prefix}_{i}" for i in range(dim)]
        dummies = dummies.reindex(columns=expected_cols, fill_value=0)
        return dummies

    hour_one_hot = onehot_with_fixed_dim(data['hour_of_day'], 'Hour', dim)
    day_one_hot = onehot_with_fixed_dim(data['day_of_week'], 'Day', dim)
    week_one_hot = onehot_with_fixed_dim(data['week_of_month'], 'Week', dim)
    month_one_hot = onehot_with_fixed_dim(data['month'], 'Month', dim)
    
    data = pd.concat([data, hour_one_hot, day_one_hot, week_one_hot, month_one_hot], axis=1)
    return data

####################################
# 3. Rolling MinMax Scaling (분모 0 방지)
####################################
def rolling_minmax_scale(series, window=24):
    roll_min = series.rolling(window=window, min_periods=window).min()
    roll_max = series.rolling(window=window, min_periods=window).max()
    # 기본 minmax scaling 계산 (분모에 아주 작은 epsilon 추가)
    scaled = (series - roll_min) / ((roll_max - roll_min) + 1e-8)
    # 무한대나 -무한대 값을 NaN으로 대체하고, NaN은 최대값 1.0으로 대체
    scaled = scaled.replace([np.inf, -np.inf], np.nan)
    scaled = scaled.fillna(1.0)
    # 혹시 1보다 큰 값이 있다면 최대값 1.0으로 클리핑
    scaled = scaled.clip(upper=1.0)
    return scaled


####################################
# 4. Binning 후 One-Hot 인코딩 (각 feature를 128차원으로)
####################################
def bin_and_encode(data, features, bins=128, drop_original=True):
    for feature in features:
        data[f'{feature}_Bin'] = pd.cut(data[feature], bins=bins, labels=False)
        one_hot = pd.get_dummies(data[f'{feature}_Bin'], prefix=f'{feature}_Bin')
        expected_columns = [f'{feature}_Bin_{i}' for i in range(bins)]
        one_hot = one_hot.reindex(columns=expected_columns, fill_value=0)
        data = pd.concat([data, one_hot], axis=1)
        if drop_original:
            data.drop(columns=[f'{feature}_Bin'], inplace=True)
    numeric_cols = data.select_dtypes(include=[np.number]).columns
    data[numeric_cols] = data[numeric_cols].astype(np.float32)
    return data

####################################
# 5. 데이터 로드 및 전처리
####################################
data = pd.read_csv("BTC_upbit_KRW_min5.csv", index_col=0)
data.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
data.index = pd.to_datetime(data.index)

# 기본 지표 계산 및 추가 feature 생성
data = calculate_indicators(data)
# data = calculate_price_differences(data)
data = encode_datetime_features_onehot(data, dim=128)

# 타깃: close 값을 그대로 사용 (continuous)
data['close_target'] = data['close']

####################################
# [Bollinger Bands 관련 새로운 feature 계산]
####################################
data['BB_diff'] = data['BB_Upper'] - data['BB_Lower']
data['BB_close_upper'] = data['close'] - data['BB_Upper']
data['BB_close_lower'] = data['close'] - data['BB_Lower']

####################################
# [인코딩 대상 feature 목록 업데이트]
####################################
# 원래 feature들을 대상으로 전일 대비 상승률(= pct_change()*100)을 계산
features_to_bin = ['open', 'high', 'low', 'volume', 'value', 'William_R',
                   'ATR', 'OBV', 'Z_Score', 'Entropy', 'SMA_5', 'SMA_10', 
                   'SMA_20', 'SMA_60', 'SMA_120', 'SMA_250', 'RSI', 'MACD', 'Stochastic',
                   'BB_diff', 'BB_close_upper', 'BB_close_lower']

# 각 feature에 대해 전일 대비 상승률을 계산하고 rolling minmax scaling 적용
for feature in features_to_bin:
    col_pct = feature + '_pct'
    data[col_pct] = data[feature].pct_change() * 100
    data[col_pct] = rolling_minmax_scale(data[col_pct], window=24)
    
# NaN 제거 (pct_change 및 rolling scaling으로 인한)
data = data.dropna()

# pct_change 결과에 대해 binning과 one-hot 인코딩 수행
features_pct = [f + '_pct' for f in features_to_bin]
data = bin_and_encode(data, features_pct, bins=128, drop_original=True)

# datetime one-hot encoding된 컬럼 추출
datetime_onehot_features = [col for col in data.columns if col.startswith('Hour_') or 
                              col.startswith('Day_') or col.startswith('Week_') or 
                              col.startswith('Month_')]

# 최종 입력 데이터 구성: 각 지표의 상승률에 대한 one-hot 벡터와 datetime one-hot 벡터 결합
final_input_columns = []
for feature in features_pct:
    final_input_columns.extend([f'{feature}_Bin_{i}' for i in range(128)])
final_input_columns.extend(datetime_onehot_features)

final_target_column = ['close_target']

data_input = data[final_input_columns]
data_target = data[final_target_column]

####################################
# 6-2. Dataset 정의 (입력과 타깃을 별도로 사용)
####################################
class TimeSeriesDataset(Dataset):
    def __init__(self, input_data, target_data, lookback=24):
        self.input_data = input_data.values
        self.target_data = target_data.values  # shape: (N, 1)
        self.lookback = lookback

    def __len__(self):
        return len(self.input_data) - self.lookback

    def __getitem__(self, idx):
        x = self.input_data[idx: idx + self.lookback, :]
        y = self.target_data[idx + self.lookback, 0]
        y_prev = self.target_data[idx + self.lookback - 1, 0]
        y_target = 1 if y > y_prev else 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y_target, dtype=torch.long)

####################################
# 7. Transformer Encoder 직접 구현
####################################
# 7-1. Multi-Head Self-Attention
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        assert self.head_dim * num_heads == embed_dim, "embed_dim은 num_heads로 나누어떨어져야 합니다."
        
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key   = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)
        self.out   = nn.Linear(embed_dim, embed_dim)
        
    def forward(self, x):
        batch_size, seq_len, embed_dim = x.shape
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        scores = torch.matmul(Q, K.transpose(-2, -1)) / sqrt(self.head_dim)
        attn = torch.softmax(scores, dim=-1)
        out = torch.matmul(attn, V)
        out = out.transpose(1, 2).contiguous().view(batch_size, seq_len, embed_dim)
        out = self.out(out)
        return out

# 7-2. Feed-Forward Network
class FeedForward(nn.Module):
    def __init__(self, embed_dim, ffn_dim):
        super(FeedForward, self).__init__()
        self.fc1 = nn.Linear(embed_dim, ffn_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(ffn_dim, embed_dim)
        
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

# 7-3. Transformer Encoder Layer (Self-Attention + FFN + Residual + LayerNorm)
class TransformerEncoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, ffn_dim):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.ffn = FeedForward(embed_dim, ffn_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        
    def forward(self, x):
        attn_out = self.self_attn(x)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x)
        x = self.norm2(x + ffn_out)
        return x

# 7-4. Encoder-Only Transformer 직접 구현 (lookback=24이므로, max_seq_len=24)
class EncoderOnlyTransformerCustom(nn.Module):
    def __init__(self, input_dim, embedding_dim=512, num_heads=8, num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=24):
        super(EncoderOnlyTransformerCustom, self).__init__()
        self.token_embedding = nn.Linear(input_dim, embedding_dim)
        self.position_embedding = nn.Embedding(max_seq_len, embedding_dim)
        self.layers = nn.ModuleList([
            TransformerEncoderLayer(embedding_dim, num_heads, ffn_dim)
            for _ in range(num_layers)
        ])
        self.fc = nn.Linear(embedding_dim, num_classes)
        
    def forward(self, x):
        batch_size, seq_len, _ = x.shape
        x = self.token_embedding(x)
        positions = torch.arange(seq_len, device=x.device).unsqueeze(0).expand(batch_size, seq_len)
        x = x + self.position_embedding(positions)
        for layer in self.layers:
            x = layer(x)
        x = x[:, -1, :]
        return self.fc(x)

####################################
# 8. 학습 및 평가 루프 (Fine-tuning 및 Validation Accuracy 출력)
####################################
def evaluate_model(model, data_loader, device):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
    return total_loss / len(data_loader), correct / total

def train_model(model, train_loader, val_loader, num_epochs, lr, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
    
    best_val_loss = float('inf')
    best_state = None
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
            correct += (outputs.argmax(1) == y).sum().item()
            total += y.size(0)
        train_loss = total_loss / len(train_loader)
        train_acc = correct / total
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        scheduler.step(val_loss)
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_state = model.state_dict()
    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def train_and_evaluate(data, num_experiments=16, lookback=24, num_epochs=10):
    # 최종 입력은 각 지표의 상승률(one-hot 인코딩된)과 datetime one-hot 인코딩 결합
    input_cols = []
    for feature in features_pct:
        input_cols.extend([f'{feature}_Bin_{i}' for i in range(128)])
    input_cols.extend(datetime_onehot_features)
    target_cols = ['close_target']
    
    data_input = data[input_cols]
    data_target = data[target_cols]
    
    # 여기서 모든 컬럼을 numeric 타입으로 변환
    data_input = data_input.apply(pd.to_numeric)
    data_input = data_input.astype(np.float32)
    data_target = data_target.apply(pd.to_numeric)
    data_target = data_target.astype(np.float32)
    
    step_size = 2500  # 데이터 구간 이동 단위
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    val_acc_list = []
    test_acc_list = []
    data_input.to_csv("onehotenc.csv")
    
    for exp in range(num_experiments):
        train_start = exp * step_size
        train_end = train_start + step_size * 8
        val_end = train_end + step_size
        test_end = val_end + step_size
        if test_end > len(data_input):
            break
        print(f"Experiment {exp}: 데이터 구간 [{train_start}:{test_end}]")
        # print(data)
        
        train_input = data_input.iloc[train_start:train_end]
        train_target = data_target.iloc[train_start:train_end]
        val_input = data_input.iloc[train_end:val_end]
        val_target = data_target.iloc[train_end:val_end]
        test_input = data_input.iloc[val_end:test_end]
        test_target = data_target.iloc[val_end:test_end]
        
        train_dataset = TimeSeriesDataset(train_input, train_target, lookback=lookback)
        val_dataset = TimeSeriesDataset(val_input, val_target, lookback=lookback)
        test_dataset = TimeSeriesDataset(test_input, test_target, lookback=lookback)
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
        
        lr = 1e-4
        input_dim = data_input.shape[1]
        model = EncoderOnlyTransformerCustom(input_dim=input_dim, embedding_dim=512, num_heads=8, 
                                               num_layers=6, ffn_dim=2048, num_classes=2, max_seq_len=lookback).to(device)
        model_path = f"model_experiment_{exp}.pth"
        if exp > 0:
            try:
                model.load_state_dict(torch.load(f"model_experiment_{exp - 1}.pth"))
                print(f"Loaded model from experiment {exp - 1} for fine-tuning.")
            except FileNotFoundError:
                print(f"Model file for experiment {exp - 1} not found. Starting fresh training.")
        
        print(f"Experiment {exp}: Training with lr={lr} (Fine-Tuning)")
        model = train_model(model, train_loader, val_loader, num_epochs, lr, device)
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for experiment {exp}.")
        
        val_loss, val_acc = evaluate_model(model, val_loader, device)
        val_acc_list.append(val_acc)
        print(f"Experiment {exp}: Final Validation Accuracy: {val_acc:.4f}")
        
        test_loss, test_acc = evaluate_model(model, test_loader, device)
        test_acc_list.append(test_acc)
        print(f"Experiment {exp}: Test Accuracy: {test_acc:.4f}")
    
    if len(val_acc_list) > 0:
        avg_val_acc = sum(val_acc_list) / len(val_acc_list)
        avg_test_acc = sum(test_acc_list) / len(test_acc_list)
        print(f"\nFinal Average Validation Accuracy: {avg_val_acc:.4f}")
        print(f"Final Average Test Accuracy: {avg_test_acc:.4f}")
    else:
        print("실험이 한 번도 실행되지 않았습니다.")


# # features_pct 리스트: 각 원본 feature에 대해 '_pct'가 붙은 컬럼명
# features_pct = [f + '_pct' for f in features_to_bin]

train_and_evaluate(data)


Experiment 0: 데이터 구간 [0:25000]
Experiment 0: Training with lr=0.0001 (Fine-Tuning)




Epoch 1/10 | Train Loss: 0.7201, Train Acc: 0.5113 | Val Loss: 0.6965, Val Acc: 0.5032
Epoch 2/10 | Train Loss: 0.6936, Train Acc: 0.5292 | Val Loss: 0.7069, Val Acc: 0.4972
Epoch 3/10 | Train Loss: 0.6721, Train Acc: 0.5922 | Val Loss: 0.7048, Val Acc: 0.5065
Epoch 4/10 | Train Loss: 0.6554, Train Acc: 0.6141 | Val Loss: 0.7429, Val Acc: 0.5028
Epoch 5/10 | Train Loss: 0.6323, Train Acc: 0.6503 | Val Loss: 0.7491, Val Acc: 0.4919
Epoch 6/10 | Train Loss: 0.6159, Train Acc: 0.6690 | Val Loss: 0.8047, Val Acc: 0.4968
Epoch 7/10 | Train Loss: 0.6073, Train Acc: 0.6763 | Val Loss: 0.7616, Val Acc: 0.5016
Epoch 8/10 | Train Loss: 0.5880, Train Acc: 0.6982 | Val Loss: 0.7961, Val Acc: 0.4879
Epoch 9/10 | Train Loss: 0.5811, Train Acc: 0.7051 | Val Loss: 0.8206, Val Acc: 0.5016
Epoch 10/10 | Train Loss: 0.5767, Train Acc: 0.7109 | Val Loss: 0.7620, Val Acc: 0.4952
Saved model for experiment 0.
Experiment 0: Final Validation Accuracy: 0.4952
Experiment 0: Test Accuracy: 0.5186
Experiment 1: 데