In [2]:
import pandas as pd
import glob
import os

In [3]:
os.chdir('/home1/gkrtod35/ISF/TimeGAN/Origin_data')

In [31]:
df = pd.read_csv('merged_data_processed_seoul.csv', low_memory=False)
df.head()

Unnamed: 0,Idx,date,time,solar generation,일시,기온(°C),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),...,일조(hr),일사(MJ/m2),적설(cm),전운량(10분위),중하층운량(10분위),지면온도(°C),5cm 지중온도(°C),10cm 지중온도(°C),20cm 지중온도(°C),30cm 지중온도(°C)
0,0,2014-01-01,0,0.0,2014-01-01 00:00,3.3,3.8,250.0,65.0,5.0,...,0.0,0.0,0.0,6.0,6.0,0.0,0.1,-0.2,0.0,1.5
1,0,2014-01-01,1,0.0,2014-01-01 01:00,2.6,2.3,250.0,66.0,4.9,...,0.0,0.0,0.0,0.0,0.0,-0.1,0.1,-0.2,0.1,1.5
2,0,2014-01-01,2,0.0,2014-01-01 02:00,1.7,1.7,250.0,67.0,4.6,...,0.0,0.0,0.0,0.0,0.0,-0.3,0.0,-0.2,0.0,1.5
3,0,2014-01-01,3,0.0,2014-01-01 03:00,1.4,1.4,250.0,60.0,4.1,...,0.0,0.0,0.0,0.0,0.0,-0.4,0.0,-0.2,0.1,1.5
4,0,2014-01-01,4,0.0,2014-01-01 04:00,0.9,2.8,270.0,59.0,3.8,...,0.0,0.0,0.0,0.0,0.0,-0.6,0.0,-0.2,0.0,1.5


In [21]:
df.dtypes

Idx                   int64
date                 object
time                  int64
solar generation     object
일시                   object
기온(°C)              float64
풍속(m/s)             float64
풍향(16방위)            float64
습도(%)               float64
증기압(hPa)            float64
이슬점온도(°C)           float64
현지기압(hPa)           float64
해면기압(hPa)           float64
일조(hr)              float64
일사(MJ/m2)           float64
적설(cm)              float64
전운량(10분위)           float64
중하층운량(10분위)         float64
지면온도(°C)            float64
5cm 지중온도(°C)        float64
10cm 지중온도(°C)       float64
20cm 지중온도(°C)       float64
30cm 지중온도(°C)       float64
dtype: object

In [32]:
# 
seq_len  = 8760   # 학습용: 1년치 (1h 단위 → 8760h)
pred_len =   24   # 테스트용: 1일치 (24h)

# 꼬리(tail)에서 잘라내기
#    – train_data: 마지막(pred_len)시간 바로 앞의 seq_len시간
#    – test_data : 마지막 pred_len시간
df = df[-(seq_len + pred_len) : ]  # shape (8760, C)

In [33]:
numeric_df = df.drop(columns=['Idx','date','time','일시'])
numeric_df = numeric_df.apply(pd.to_numeric, errors='coerce')

In [11]:
import torch
import numpy as np
from torch.utils.data import Dataset

In [12]:
class MultivariateTimeSeriesDataset(Dataset):
    def __init__(self, series: np.ndarray, seq_len: int,
                 horizon: int, target_idx: int):
        """
        series: (T, F) 형태의 다변량 시계열
        seq_len: 과거 스텝 수 (8760)
        horizon: 예측할 미래 스텝 수 (24)
        target_idx: series 열 기준, Solar Generation의 인덱스
        """
        self.series     = torch.tensor(series, dtype=torch.float)
        self.seq_len    = seq_len
        self.horizon    = horizon
        self.target_idx = target_idx

    def __len__(self):
        # 가능한 윈도우 개수 = T - seq_len - horizon + 1
        return len(self.series) - self.seq_len - self.horizon + 1

    def __getitem__(self, idx):
        x = self.series[
            idx : idx + self.seq_len
        ]                              # (seq_len, F)
        y = self.series[
            idx + self.seq_len :
            idx + self.seq_len + self.horizon,
            self.target_idx
        ]                              # (horizon,)
        return x, y

In [13]:
import math
import torch.nn as nn

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=10000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)               # (max_len, d_model)
        pos = torch.arange(max_len).unsqueeze(1).float() # (max_len,1)
        div = torch.exp(torch.arange(0, d_model, 2).float()
                        * -(math.log(10000.0)/d_model))
        pe[:, 0::2] = torch.sin(pos * div)
        pe[:, 1::2] = torch.cos(pos * div)
        self.register_buffer('pe', pe.unsqueeze(0))     # (1, max_len, d_model)

    def forward(self, x):
        # x: (B, seq_len, d_model)
        return x + self.pe[:, :x.size(1)]

In [14]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, d_model=64, nhead=4,
                 num_layers=3, dim_feedforward=128,
                 dropout=0.1, horizon=24):
        super().__init__()
        # 1) 입력 투영: F → d_model
        self.input_proj = nn.Linear(input_dim, d_model)
        # 2) 위치 인코딩
        self.pos_encoder = PositionalEncoding(d_model)
        # 3) EncoderLayer 정의
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout
        )
        # 4) 여러 레이어 쌓기
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer, num_layers=num_layers
        )
        # 5) 출력층: d_model → horizon
        self.output_head = nn.Linear(d_model, horizon)
        self.horizon = horizon

    def forward(self, src):
        """
        src: (B, seq_len, F)
        returns: (B, horizon) — Solar Generation 24스텝 예측
        """
        # 1) 투영 & 위치 인코딩
        x = self.input_proj(src)           # (B, seq_len, d_model)
        x = self.pos_encoder(x)            # (B, seq_len, d_model)
        # 2) Transformer가 요구하는 순서로 변환
        x = x.permute(1, 0, 2)             # (seq_len, B, d_model)
        # 3) Encoder 통과
        x = self.transformer_encoder(x)    # (seq_len, B, d_model)
        # 4) 마지막 시점만 사용
        x = x[-1]                          # (B, d_model)
        # 5) 예측치 생성
        out = self.output_head(x)          # (B, horizon)
        return out

In [15]:
import torch.optim as optim

def train_model(model, dataloader, n_epochs=30,
                lr=1e-4, device='cuda'):
    model.to(device)
    opt = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(1, n_epochs+1):
        model.train()
        total_loss = 0.0
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)  # (B,8760,F), (B,24)
            opt.zero_grad()
            pred = model(x)                    # (B,24)
            loss = criterion(pred, y)
            loss.backward()
            opt.step()
            total_loss += loss.item() * x.size(0)
        avg = total_loss / len(dataloader.dataset)
        print(f'Epoch {epoch:02d}  Loss: {avg:.4f}')

In [36]:
if __name__ == '__main__':
    import numpy as np
    from torch.utils.data import DataLoader

    # 시계열 —
    T, F = 20000, 19            # F는 변수 갯수 지정
    data = numeric_df           # 데이터 갖고오기
    solar_idx = 0               # Solar Generation 행 지정

    # 데이터셋／로더
    seq_len = 720            # 8760
    horizon = 24

    series_np = data.to_numpy()    # DataFrame → NumPy 배열
    dataset  = MultivariateTimeSeriesDataset(
        series_np, seq_len, horizon, solar_idx
    )
    loader   = DataLoader(
        dataset, batch_size=8, shuffle=True
    )

    # 모델 & 학습
    model = TimeSeriesTransformer(
        input_dim=F,
        d_model=128,
        nhead=8,
        num_layers=4,
        dim_feedforward=256,
        dropout=0.1,
        horizon=horizon
    )
    train_model(model, loader, n_epochs=20,
                lr=5e-4, device='cuda')

Epoch 01  Loss: 13.7594
Epoch 02  Loss: 13.6928
Epoch 03  Loss: 13.6821
Epoch 04  Loss: 13.6738
Epoch 05  Loss: 13.6675
Epoch 06  Loss: 13.6727
Epoch 07  Loss: 13.6615
Epoch 08  Loss: 13.6671
Epoch 09  Loss: 13.6635
Epoch 10  Loss: 13.6597
Epoch 11  Loss: 13.6626
Epoch 12  Loss: 13.6638
Epoch 13  Loss: 13.6597
Epoch 14  Loss: 13.6575
Epoch 15  Loss: 13.6590
Epoch 16  Loss: 13.6561
Epoch 17  Loss: 13.6556
Epoch 18  Loss: 13.6526
Epoch 19  Loss: 13.6534
Epoch 20  Loss: 13.6540


In [37]:
# rmse 계산

model.eval()
all_rmse = 0.0
n_samples = 0

with torch.no_grad():
    for x, y in val_loader:        # x: (B,8760,F), y: (B,24)
        x, y = x.to(device), y.to(device)
        preds = model(x)           # preds: (B,24)

        # 배치 전체에 대한 MSE
        mse = torch.mean((preds - y) ** 2)
        rmse = torch.sqrt(mse)     # 스칼라

        all_rmse += rmse.item() * x.size(0)
        n_samples += x.size(0)

mean_rmse = all_rmse / n_samples
print(f'Validation RMSE (24-step): {mean_rmse:.4f}')

NameError: name 'val_loader' is not defined