In [None]:
import pandas as pd
import numpy as np
import pandas_ta as ta
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# 기술적 지표 계산 함수
def calculate_indicators(data):
    data['William_R'] = ta.willr(data['high'], data['low'], data['close'])
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'])
    data['OBV'] = ta.obv(data['close'], data['volume'])
    data['Z_Score'] = (data['close'] - data['close'].rolling(window=20).mean()) / data['close'].rolling(window=20).std()
    data['Entropy'] = ta.entropy(data['close'], length=14)
    data['SMA_5'] = data['close'].rolling(window=5).mean()
    data['SMA_10'] = data['close'].rolling(window=10).mean()
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_60'] = data['close'].rolling(window=60).mean()
    data['SMA_120'] = data['close'].rolling(window=120).mean()
    data['RSI'] = ta.rsi(data['close'])
    bb = ta.bbands(data['close'])
    data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
    macd = ta.macd(data['close'])
    data['MACD'] = macd.iloc[:, 0]
    data['Stochastic'] = ta.stoch(data['high'], data['low'], data['close']).iloc[:, 0]
    return data.dropna()

# Datetime Feature One-Hot Encoding
def encode_datetime_features(data):
    if 'datetime' not in data.columns:
        data['datetime'] = pd.to_datetime(data.index)
    data['hour_of_day'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek
    data['week_of_month'] = (data['datetime'].dt.day - 1) // 7 + 1
    data['month'] = data['datetime'].dt.month
    hour_one_hot = pd.get_dummies(data['hour_of_day'], prefix='Hour')
    day_one_hot = pd.get_dummies(data['day_of_week'], prefix='Day')
    week_one_hot = pd.get_dummies(data['week_of_month'], prefix='Week')
    month_one_hot = pd.get_dummies(data['month'], prefix='Month')
    return pd.concat([data, hour_one_hot, day_one_hot, week_one_hot, month_one_hot], axis=1)

# 데이터 로드 및 전처리
data = pd.read_csv("BTC_upbit_KRW_min5.csv", index_col=0)
data.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
data.index = pd.to_datetime(data.index)
data = calculate_indicators(data)
data = encode_datetime_features(data)

# Feature 선택 및 정규화
features = ['open', 'high', 'low', 'close', 'volume', 'value', 'William_R',
            'ATR', 'OBV', 'Z_Score', 'Entropy', 'SMA_5', 'SMA_10', 'SMA_20', 'SMA_60', 'SMA_120', 'RSI', 
            'BB_Upper', 'BB_Middle', 'BB_Lower', 'MACD', 'Stochastic'] + \
            list(data.filter(regex='Hour_').columns) + list(data.filter(regex='Day_').columns) + \
            list(data.filter(regex='Week_').columns) + list(data.filter(regex='Month_').columns)

data = data[features].dropna()
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])

# Dataset 정의
class TimeSeriesDataset(Dataset):
    def __init__(self, data, lookback=15, target_idx=-1):
        self.data = data
        self.lookback = lookback
        self.target_idx = target_idx

    def __len__(self):
        return len(self.data) - self.lookback

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.lookback, :]
        y = self.data[idx + self.lookback, self.target_idx]
        y_target = 1 if y > self.data[idx + self.lookback - 1, self.target_idx] else 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y_target, dtype=torch.long)

# Transformer 모델 정의
class EncoderOnlyTransformer(nn.Module):
    def __init__(self, input_dim, embedding_dim=512, num_heads=8, num_layers=4, ffn_dim=1024, num_classes=2):
        super(EncoderOnlyTransformer, self).__init__()
        self.token_embedding = nn.Linear(input_dim, embedding_dim)
        self.position_embedding = nn.Embedding(512, embedding_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embedding_dim, nhead=num_heads, dim_feedforward=ffn_dim
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(embedding_dim, num_classes)

    def forward(self, x):
        batch_size, seq_len, feature_dim = x.shape
        x = self.token_embedding(x)
        positions = torch.arange(seq_len).unsqueeze(0).repeat(batch_size, 1).to(x.device)
        x += self.position_embedding(positions)
        x = x.permute(1, 0, 2)  # Convert to (seq_len, batch_size, embedding_dim)
        x = self.encoder(x)
        x = x[-1]  # Take the last token's representation
        return self.fc(x)

# 학습 및 평가 루프
def train_and_evaluate(data, num_experiments=21, lookback=15, num_epochs=10):
    input_dim = len(features)
    step_size = 25000  # 이동 단위 크기

    for exp in range(num_experiments):
        train_start = exp * step_size
        train_end = train_start + step_size * 8  # 훈련 데이터 (8 배율)
        val_end = train_end + step_size  # 검증 데이터 (1 배율)
        test_end = val_end + step_size  # 테스트 데이터 (1 배율)

        if test_end > len(data):
            break

        train_data = data[train_start:train_end]
        val_data = data[train_end:val_end]
        test_data = data[val_end:test_end]

        train_dataset = TimeSeriesDataset(train_data.values, lookback=lookback, target_idx=features.index('close'))
        val_dataset = TimeSeriesDataset(val_data.values, lookback=lookback, target_idx=features.index('close'))
        test_dataset = TimeSeriesDataset(test_data.values, lookback=lookback, target_idx=features.index('close'))

        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

        # 모델 초기화
        model = EncoderOnlyTransformer(input_dim=input_dim).to('cuda' if torch.cuda.is_available() else 'cpu')

        # Fine-tuning: 이전 모델 로드 (exp > 0일 때)
        model_path = f"model_experiment_{exp}.pth"
        if exp > 0:
            try:
                model.load_state_dict(torch.load(f"model_experiment_{exp - 1}.pth"))  # 이전 실험 모델 불러오기
                print(f"Loaded model from experiment {exp - 1}.")
            except FileNotFoundError:
                print(f"Model file for experiment {exp - 1} not found. Starting fresh training.")

        def train_model(model, train_loader, val_loader, num_epochs=10, lr=1e-4):
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            model.to(device)

            for epoch in range(num_epochs):
                model.train()
                total_loss, correct, total = 0, 0, 0

                for x, y in train_loader:
                    x, y = x.to(device), y.to(device)
                    optimizer.zero_grad()
                    outputs = model(x)
                    loss = criterion(outputs, y)
                    loss.backward()
                    optimizer.step()

                    total_loss += loss.item()
                    correct += (outputs.argmax(1) == y).sum().item()
                    total += y.size(0)

                print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {total_loss:.4f}, Train Accuracy: {correct / total:.4f}")

        # 모델 학습
        train_model(model, train_loader, val_loader, num_epochs)

        # 모델 저장
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for experiment {exp}.")

        # 테스트
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for x, y in test_loader:
                x, y = x.to('cuda' if torch.cuda.is_available() else 'cpu'), y.to('cuda' if torch.cuda.is_available() else 'cpu')
                outputs = model(x)
                correct += (outputs.argmax(1) == y).sum().item()
                total += y.size(0)

        print(f"Experiment {exp + 1}, Test Accuracy: {correct / total:.4f}")

train_and_evaluate(data)


In [None]:
import pandas as pd
import numpy as np
import pandas_ta as ta
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# 기술적 지표 계산 함수
def calculate_indicators(data):
    data['William_R'] = ta.willr(data['high'], data['low'], data['close'])
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'])
    data['OBV'] = ta.obv(data['close'], data['volume'])
    data['Z_Score'] = (data['close'] - data['close'].rolling(window=20).mean()) / data['close'].rolling(window=20).std()
    data['Entropy'] = ta.entropy(data['close'], length=14)
    data['SMA_5'] = data['close'].rolling(window=5).mean()
    data['SMA_10'] = data['close'].rolling(window=10).mean()
    data['SMA_20'] = data['close'].rolling(window=20).mean()
    data['SMA_60'] = data['close'].rolling(window=60).mean()
    data['SMA_120'] = data['close'].rolling(window=120).mean()
    data['RSI'] = ta.rsi(data['close'])
    bb = ta.bbands(data['close'])
    data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
    macd = ta.macd(data['close'])
    data['MACD'] = macd.iloc[:, 0]
    data['Stochastic'] = ta.stoch(data['high'], data['low'], data['close']).iloc[:, 0]
    return data.dropna()

# Datetime Feature One-Hot Encoding
def encode_datetime_features(data):
    if 'datetime' not in data.columns:
        data['datetime'] = pd.to_datetime(data.index)
    # data['hour_of_day'] = data['datetime'].dt.hour
    data['day_of_week'] = data['datetime'].dt.dayofweek
    data['week_of_month'] = (data['datetime'].dt.day - 1) // 7 + 1
    data['month'] = data['datetime'].dt.month
    # hour_one_hot = pd.get_dummies(data['hour_of_day'], prefix='Hour')
    day_one_hot = pd.get_dummies(data['day_of_week'], prefix='Day')
    week_one_hot = pd.get_dummies(data['week_of_month'], prefix='Week')
    month_one_hot = pd.get_dummies(data['month'], prefix='Month')
    return pd.concat([data, day_one_hot, week_one_hot, month_one_hot], axis=1)

# 데이터 로드 및 전처리
data = pd.read_csv("se_price_daily.csv", index_col=0)
data.columns = ['open', 'high', 'low', 'close', 'volume', 'value']
data.index = pd.to_datetime(data.index)
data = calculate_indicators(data)
data = encode_datetime_features(data)

# Feature 선택 및 정규화
features = ['open', 'high', 'low', 'close', 'volume', 'value', 'William_R',
            'ATR', 'OBV', 'Z_Score', 'Entropy', 'SMA_5', 'SMA_10', 'SMA_20', 'SMA_60', 'SMA_120', 'RSI', 
            'BB_Upper', 'BB_Middle', 'BB_Lower', 'MACD', 'Stochastic'] + \
            list(data.filter(regex='Day_').columns) + \
            list(data.filter(regex='Week_').columns) + list(data.filter(regex='Month_').columns)

data = data[features].dropna()
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])

# Dataset 정의
class TimeSeriesDataset(Dataset):
    def __init__(self, data, lookback=15, target_idx=-1):
        self.data = data
        self.lookback = lookback
        self.target_idx = target_idx

    def __len__(self):
        return len(self.data) - self.lookback

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.lookback, :]
        y = self.data[idx + self.lookback, self.target_idx]
        y_target = 1 if y > self.data[idx + self.lookback - 1, self.target_idx] else 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y_target, dtype=torch.long)

# Transformer 모델 정의
class EncoderOnlyTransformer(nn.Module):
    def __init__(self, input_dim, embedding_dim=512, num_heads=8, num_layers=4, ffn_dim=1024, num_classes=2):
        super(EncoderOnlyTransformer, self).__init__()
        self.token_embedding = nn.Linear(input_dim, embedding_dim)
        self.position_embedding = nn.Embedding(512, embedding_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embedding_dim, nhead=num_heads, dim_feedforward=ffn_dim
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(embedding_dim, num_classes)

    def forward(self, x):
        batch_size, seq_len, feature_dim = x.shape
        x = self.token_embedding(x)
        positions = torch.arange(seq_len).unsqueeze(0).repeat(batch_size, 1).to(x.device)
        x += self.position_embedding(positions)
        x = x.permute(1, 0, 2)  # Convert to (seq_len, batch_size, embedding_dim)
        x = self.encoder(x)
        x = x[-1]  # Take the last token's representation
        return self.fc(x)

# 학습 및 평가 루프
def train_and_evaluate(data, num_experiments=21, lookback=15, num_epochs=10):
    input_dim = len(features)
    step_size = 250  # 이동 단위 크기

    for exp in range(num_experiments):
        train_start = exp * step_size
        train_end = train_start + step_size * 8  # 훈련 데이터 (8 배율)
        val_end = train_end + step_size  # 검증 데이터 (1 배율)
        test_end = val_end + step_size  # 테스트 데이터 (1 배율)

        if test_end > len(data):
            break

        train_data = data[train_start:train_end]
        val_data = data[train_end:val_end]
        test_data = data[val_end:test_end]

        train_dataset = TimeSeriesDataset(train_data.values, lookback=lookback, target_idx=features.index('close'))
        val_dataset = TimeSeriesDataset(val_data.values, lookback=lookback, target_idx=features.index('close'))
        test_dataset = TimeSeriesDataset(test_data.values, lookback=lookback, target_idx=features.index('close'))

        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

        # 모델 초기화
        model = EncoderOnlyTransformer(input_dim=input_dim).to('cuda' if torch.cuda.is_available() else 'cpu')

        # Fine-tuning: 이전 모델 로드 (exp > 0일 때)
        model_path = f"model_experiment_{exp}.pth"
        if exp > 0:
            try:
                model.load_state_dict(torch.load(f"model_experiment_{exp - 1}.pth"))  # 이전 실험 모델 불러오기
                print(f"Loaded model from experiment {exp - 1}.")
            except FileNotFoundError:
                print(f"Model file for experiment {exp - 1} not found. Starting fresh training.")

        def train_model(model, train_loader, val_loader, num_epochs=10, lr=1e-4):
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            model.to(device)

            for epoch in range(num_epochs):
                model.train()
                total_loss, correct, total = 0, 0, 0

                for x, y in train_loader:
                    x, y = x.to(device), y.to(device)
                    optimizer.zero_grad()
                    outputs = model(x)
                    loss = criterion(outputs, y)
                    loss.backward()
                    optimizer.step()

                    total_loss += loss.item()
                    correct += (outputs.argmax(1) == y).sum().item()
                    total += y.size(0)

                print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {total_loss:.4f}, Train Accuracy: {correct / total:.4f}")

        # 모델 학습
        train_model(model, train_loader, val_loader, num_epochs)

        # 모델 저장
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for experiment {exp}.")

        # 테스트
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for x, y in test_loader:
                x, y = x.to('cuda' if torch.cuda.is_available() else 'cpu'), y.to('cuda' if torch.cuda.is_available() else 'cpu')
                outputs = model(x)
                correct += (outputs.argmax(1) == y).sum().item()
                total += y.size(0)

        print(f"Experiment {exp + 1}, Test Accuracy: {correct / total:.4f}")

train_and_evaluate(data)
