In [14]:
import pandas as pd
import glob
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler

In [2]:
os.chdir('/home1/gkrtod35/ISF/TimeGAN/Origin_data')

In [16]:
df = pd.read_csv('merged_data_processed_seoul.csv', low_memory=False)
df.head()

Unnamed: 0,Idx,date,time,solar generation,일시,기온(°C),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),...,일조(hr),일사(MJ/m2),적설(cm),전운량(10분위),중하층운량(10분위),지면온도(°C),5cm 지중온도(°C),10cm 지중온도(°C),20cm 지중온도(°C),30cm 지중온도(°C)
0,0,2014-01-01,0,0.0,2014-01-01 00:00,3.3,3.8,250.0,65.0,5.0,...,0.0,0.0,0.0,6.0,6.0,0.0,0.1,-0.2,0.0,1.5
1,0,2014-01-01,1,0.0,2014-01-01 01:00,2.6,2.3,250.0,66.0,4.9,...,0.0,0.0,0.0,0.0,0.0,-0.1,0.1,-0.2,0.1,1.5
2,0,2014-01-01,2,0.0,2014-01-01 02:00,1.7,1.7,250.0,67.0,4.6,...,0.0,0.0,0.0,0.0,0.0,-0.3,0.0,-0.2,0.0,1.5
3,0,2014-01-01,3,0.0,2014-01-01 03:00,1.4,1.4,250.0,60.0,4.1,...,0.0,0.0,0.0,0.0,0.0,-0.4,0.0,-0.2,0.1,1.5
4,0,2014-01-01,4,0.0,2014-01-01 04:00,0.9,2.8,270.0,59.0,3.8,...,0.0,0.0,0.0,0.0,0.0,-0.6,0.0,-0.2,0.0,1.5


In [18]:
# 
seq_len  = 8760   # 학습용: 1년치 (1h 단위 → 8760h)
pred_len =   24   # 테스트용: 1일치 (24h)

# 꼬리(tail)에서 잘라내기
#    – train_data: 마지막(pred_len)시간 바로 앞의 seq_len시간
#    – test_data : 마지막 pred_len시간
df = df[-(seq_len + pred_len) : ]  # shape (8760, C)

In [19]:
numeric_df = df.drop(columns=['Idx','date','time','일시'])
numeric_df = numeric_df.apply(pd.to_numeric, errors='coerce')

In [20]:
class SlidingWindowDataset(Dataset):
    def __init__(self, data, seq_len=720, pred_len=24, stride=24):
        """
        data: array-like [T, C]
        seq_len: 입력 길이 (예: 720)
        pred_len: 예측 길이 (예: 24)
        stride: 윈도우 이동 간격 (예: 1)
        """
        import numpy as np
        data = np.asarray(data, dtype=np.float32)
        T, C  = data.shape
        self.X, self.Y = [], []
        for i in range(0, T - seq_len - pred_len + 1, stride):
            self.X.append(data[i : i+seq_len])                          # [seq_len, C]
            self.Y.append(data[i+seq_len : i+seq_len+pred_len, 0])      # [pred_len]
        self.X = torch.from_numpy(np.stack(self.X))  # [N, seq_len, C]
        self.Y = torch.from_numpy(np.stack(self.Y))  # [N, pred_len]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # [C, seq_len], [pred_len]
        return self.X[idx].transpose(0,1), self.Y[idx]

In [10]:
# 데이터셋 정의: sliding window로 (X, y) 생성
class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_len=8760, pred_len=24):
        """
        data: NumPy array of shape [T, C] (시간 T, 채널 C)
        seq_len: 입력 시퀀스 길이 (8760)
        pred_len: 예측할 시퀀스 길이 (24)
        """
        data = np.asarray(data, dtype=np.float32)
        
        self.X = []
        self.Y = []
        T, C = data.shape
        for i in range(T - seq_len - pred_len + 1):
            self.X.append(data[i:i+seq_len])
            self.Y.append(data[i+seq_len:i+seq_len+pred_len, 0])  # 채널0=solar 예측
        self.X = torch.tensor(self.X, dtype=torch.float32)      # [N, seq_len, C]
        self.Y = torch.tensor(self.Y, dtype=torch.float32)      # [N, pred_len]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # 모델에 넣기 편하게 채널 차원을 앞쪽으로 옮김: [C, seq_len]
        return self.X[idx].transpose(0,1), self.Y[idx]

In [6]:
# 모델 정의: CNN → LSTM → FC
class CNN_LSTM(nn.Module):
    def __init__(self, in_channels, cnn_channels=32, lstm_hidden=64, pred_len=24):
        super().__init__()
        # 1D-CNN: 채널별 시계열 특성 추출
        self.conv1 = nn.Conv1d(in_channels, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(16, cnn_channels, kernel_size=3, padding=1)
        self.relu  = nn.ReLU()
        self.pool  = nn.MaxPool1d(2)  # 길이 절반
        
        # LSTM: 시계열 의존성 학습
        self.lstm = nn.LSTM(
            input_size=cnn_channels,
            hidden_size=lstm_hidden,
            num_layers=2,
            batch_first=True
        )
        # 최종 24시간 예측용 FC
        self.fc = nn.Linear(lstm_hidden, pred_len)

    def forward(self, x):
        # x: [B, C, seq_len]
        x = self.relu(self.conv1(x))     # [B, 16, L]
        x = self.relu(self.conv2(x))     # [B, cnn_channels, L]
        x = self.pool(x)                 # [B, cnn_channels, L/2]
        
        # LSTM 입력: (batch, time, feat)
        x = x.transpose(1,2)             # [B, L/2, cnn_channels]
        out, _ = self.lstm(x)            # out: [B, L/2, lstm_hidden]
        
        # 마지막 시점만 사용해 예측
        out = out[:, -1, :]              # [B, lstm_hidden]
        y_hat = self.fc(out)             # [B, pred_len]
        return y_hat

In [7]:
# 학습/평가 루프
def train(model, loader, optim, criterion, device):
    model.train()
    total_loss = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optim.zero_grad()
        y_pred = model(x)
        loss = criterion(y_pred, y)
        loss.backward()
        optim.step()
        total_loss += loss.item() * x.size(0)
    return total_loss / len(loader.dataset)

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            total_loss += criterion(y_pred, y).item() * x.size(0)
    return total_loss / len(loader.dataset)

In [92]:
# 실행 예시
if __name__ == "__main__":
    import numpy as np

    # (1) 데이터 로딩—예: NumPy array of shape [T, C]
    data = numeric_df                             # 내 데이터
    # 2) 정규화
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    
    seq_len, pred_len = 4096, 24
    ds = SlidingWindowDataset(data, seq_len, pred_len, stride=24)
    train_size = int(len(ds)*0.8)
    val_size   = len(ds) - train_size
    train_ds, val_ds = torch.utils.data.random_split(ds, [train_size, val_size])
    train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=8)

    # (2) 모델/최적화/손실함수
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNN_LSTM(in_channels=data.shape[1]).to(device)
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    # (3) 학습 루프
    epochs = 20
    for e in range(1, epochs+1):
        tr_loss = train(model, train_loader, optim, criterion, device)
        val_loss = evaluate(model, val_loader,   criterion, device)
        print(f"Epoch {e:02d} | Train Loss: {tr_loss:.4f} | Val Loss: {val_loss:.4f}")
    
    # (4) 예측
    #x_sample, _ = ds[0]
    #x_sample = x_sample.unsqueeze(0).to(device)  # [1, C, seq_len]
    #y_pred = model(x_sample)                     # [1, pred_len]
    #print("Next 24h forecast:", y_pred.cpu().numpy().flatten())

Epoch 01 | Train Loss: 0.0631 | Val Loss: 0.0188
Epoch 02 | Train Loss: 0.0199 | Val Loss: 0.0191
Epoch 03 | Train Loss: 0.0187 | Val Loss: 0.0190
Epoch 04 | Train Loss: 0.0190 | Val Loss: 0.0156
Epoch 05 | Train Loss: 0.0176 | Val Loss: 0.0158
Epoch 06 | Train Loss: 0.0173 | Val Loss: 0.0144
Epoch 07 | Train Loss: 0.0177 | Val Loss: 0.0147
Epoch 08 | Train Loss: 0.0169 | Val Loss: 0.0154
Epoch 09 | Train Loss: 0.0166 | Val Loss: 0.0165
Epoch 10 | Train Loss: 0.0166 | Val Loss: 0.0166
Epoch 11 | Train Loss: 0.0167 | Val Loss: 0.0149
Epoch 12 | Train Loss: 0.0161 | Val Loss: 0.0132
Epoch 13 | Train Loss: 0.0153 | Val Loss: 0.0157
Epoch 14 | Train Loss: 0.0162 | Val Loss: 0.0147
Epoch 15 | Train Loss: 0.0162 | Val Loss: 0.0132
Epoch 16 | Train Loss: 0.0152 | Val Loss: 0.0155
Epoch 17 | Train Loss: 0.0150 | Val Loss: 0.0137
Epoch 18 | Train Loss: 0.0151 | Val Loss: 0.0160
Epoch 19 | Train Loss: 0.0169 | Val Loss: 0.0158
Epoch 20 | Train Loss: 0.0153 | Val Loss: 0.0132


In [93]:
    # 6) 샘플 예측
    x_sample, y_true = ds[0]
    x_sample = x_sample.unsqueeze(0).to(device)  # [1, C, seq_len]
    y_pred_scaled = model(x_sample).cpu().detach().numpy().flatten()
    y_true_scaled = y_true.numpy()

In [85]:
# --- 역스케일링 시작 ---
import numpy as np
solar_idx = 0

# 전체 피처 개수
C = data.shape[1]

# 예측/실제 배열을 (pred_len, C) 모양으로 만들고 
pred_full = np.zeros((pred_len, C), dtype=np.float32)
true_full = np.zeros((pred_len, C), dtype=np.float32)

# solar generation 채널(solar_idx)에만 값 채우기
pred_full[:, solar_idx] = y_pred_scaled
true_full[:, solar_idx] = y_true_scaled

# MinMaxScaler.inverse_transform
pred_orig = scaler.inverse_transform(pred_full)[:, solar_idx]
true_orig = scaler.inverse_transform(true_full)[:, solar_idx]
# --- 역스케일링 끝 ---

print("True next-24h (원단위):", true_orig)
print("Pred next-24h (원단위):", pred_orig)

True next-24h (원단위): [ 6.0787883   3.021766    1.319169    0.14787701  0.05296     0.05288
  0.0448      0.          0.          0.          0.          0.
  0.          0.063022    1.079396    2.910719    5.464043    8.924349
 10.057772   10.173965    9.227369   10.90645    10.762956    9.49266   ]
Pred next-24h (원단위): [ 5.06035566e+00  2.56909680e+00  8.24429691e-01  2.96606302e-01
  1.40322119e-01  1.15152486e-01  7.24719018e-02  1.43034503e-01
  2.04211920e-02  1.78020857e-02  4.67847846e-02 -9.85416491e-03
 -5.07631488e-02 -6.12467295e-03  3.48015696e-01  1.92029810e+00
  4.39507151e+00  6.62896013e+00  8.01500511e+00  9.07592106e+00
  9.00781059e+00  8.66725254e+00  7.85349178e+00  6.30200291e+00]


In [91]:
# --- RMSE 계산 ---
import numpy as np

rmse = np.sqrt(np.mean((pred_orig - true_orig) ** 2))
print(f"RMSE: {rmse:.4f}")

RMSE: 1.2676


## 보름치 데이터
보름치 데이터로 하루 예측한 RMSE와
디퓨전 생성모형을 통해 보름치 데이터를 여러개 만들어 넣은 걸 합친 데이터로 하루 예측한 RMSE로 

두 가지 경우를 비교

In [88]:
# 
df = pd.read_csv('merged_data_processed_seoul.csv', low_memory=False)

seq_len  = 360   # 학습용: 보름치 (1h 단위 → 360h)
pred_len =  24   # 테스트용: 1일치 (24h)

# 꼬리(tail)에서 잘라내기
#    – train_data: 마지막(pred_len)시간 바로 앞의 seq_len시간
#    – test_data : 마지막 pred_len시간
df_short = df[-(seq_len + pred_len) : ]  # 
numeric_df_short = df_short.drop(columns=['Idx','date','time','일시'])
numeric_df_short = numeric_df_short.apply(pd.to_numeric, errors='coerce')

In [94]:
if __name__ == "__main__":
    import numpy as np

    #  데이터 로딩—예: NumPy array of shape [T, C]
    data = numeric_df_short                             # 내 데이터
    #  정규화
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    
    seq_len, pred_len = 24, 24
    ds = SlidingWindowDataset(data, seq_len, pred_len, stride=24)
    train_size = int(len(ds)*0.8)
    val_size   = len(ds) - train_size
    train_ds, val_ds = torch.utils.data.random_split(ds, [train_size, val_size])
    train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=8)

    # 모델/최적화/손실함수
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNN_LSTM(in_channels=data.shape[1]).to(device)
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    # 학습 루프
    epochs = 20
    for e in range(1, epochs+1):
        tr_loss = train(model, train_loader, optim, criterion, device)
        val_loss = evaluate(model, val_loader,   criterion, device)
        print(f"Epoch {e:02d} | Train Loss: {tr_loss:.4f} | Val Loss: {val_loss:.4f}")
    
    # 샘플 예측
    x_sample, y_true = ds[0]
    x_sample = x_sample.unsqueeze(0).to(device)  # [1, C, seq_len]
    y_pred_scaled = model(x_sample).cpu().detach().numpy().flatten()
    y_true_scaled = y_true.numpy()

# --- 역스케일링 시작 ---
solar_idx = 0

# 전체 피처 개수
C = data.shape[1]

# 예측/실제 배열을 (pred_len, C) 모양으로 만들고 
pred_full = np.zeros((pred_len, C), dtype=np.float32)
true_full = np.zeros((pred_len, C), dtype=np.float32)

# solar generation 채널(solar_idx)에만 값 채우기
pred_full[:, solar_idx] = y_pred_scaled
true_full[:, solar_idx] = y_true_scaled

# MinMaxScaler.inverse_transform
pred_orig = scaler.inverse_transform(pred_full)[:, solar_idx]
true_orig = scaler.inverse_transform(true_full)[:, solar_idx]
# --- 역스케일링 끝 ---

# --- RMSE 계산 ---
import numpy as np

rmse = np.sqrt(np.mean((pred_orig - true_orig) ** 2))
print(f"RMSE: {rmse:.4f}")

Epoch 01 | Train Loss: 0.1191 | Val Loss: 0.0664
Epoch 02 | Train Loss: 0.1096 | Val Loss: 0.0611
Epoch 03 | Train Loss: 0.1004 | Val Loss: 0.0556
Epoch 04 | Train Loss: 0.0898 | Val Loss: 0.0494
Epoch 05 | Train Loss: 0.0769 | Val Loss: 0.0428
Epoch 06 | Train Loss: 0.0613 | Val Loss: 0.0385
Epoch 07 | Train Loss: 0.0398 | Val Loss: 0.0489
Epoch 08 | Train Loss: 0.0271 | Val Loss: 0.0827
Epoch 09 | Train Loss: 0.0266 | Val Loss: 0.0829
Epoch 10 | Train Loss: 0.0223 | Val Loss: 0.0680
Epoch 11 | Train Loss: 0.0152 | Val Loss: 0.0537
Epoch 12 | Train Loss: 0.0139 | Val Loss: 0.0468
Epoch 13 | Train Loss: 0.0159 | Val Loss: 0.0452
Epoch 14 | Train Loss: 0.0161 | Val Loss: 0.0475
Epoch 15 | Train Loss: 0.0152 | Val Loss: 0.0520
Epoch 16 | Train Loss: 0.0140 | Val Loss: 0.0577
Epoch 17 | Train Loss: 0.0143 | Val Loss: 0.0634
Epoch 18 | Train Loss: 0.0143 | Val Loss: 0.0639
Epoch 19 | Train Loss: 0.0141 | Val Loss: 0.0612
Epoch 20 | Train Loss: 0.0132 | Val Loss: 0.0552
RMSE: 3.4447


## 보름치 데이터
보름치 데이터로 하루 예측한 RMSE와

디퓨전 생성모형을 통해 보름치 데이터를 여러개 만들어 넣은 걸 합친 데이터로 하루 예측한 RMSE로 

두 가지 경우를 비교

In [96]:
# 
df = pd.read_csv('merged_data_processed_seoul.csv', low_memory=False)

seq_len  = 360   # 학습용: 보름치 (1h 단위 → 360h)
pred_len =  24   # 테스트용: 1일치 (24h)

# 꼬리(tail)에서 잘라내기
#    – train_data: 마지막(pred_len)시간 바로 앞의 seq_len시간
#    – test_data : 마지막 pred_len시간
df_short = df[-(seq_len + pred_len) : ]  # 
numeric_df_short = df_short.drop(columns=['Idx','date','time','일시'])
numeric_df_short = numeric_df_short.apply(pd.to_numeric, errors='coerce')

### 보름치 데이터로 하루 예측한 RMSE

In [97]:
if __name__ == "__main__":
    import numpy as np

    #  데이터 로딩—예: NumPy array of shape [T, C]
    data = numeric_df_short                             # 내 데이터
    #  정규화
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    
    seq_len, pred_len = 24, 24
    ds = SlidingWindowDataset(data, seq_len, pred_len, stride=24)
    train_size = int(len(ds)*0.8)
    val_size   = len(ds) - train_size
    train_ds, val_ds = torch.utils.data.random_split(ds, [train_size, val_size])
    train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=8)

    # 모델/최적화/손실함수
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNN_LSTM(in_channels=data.shape[1]).to(device)
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    # 학습 루프
    epochs = 20
    for e in range(1, epochs+1):
        tr_loss = train(model, train_loader, optim, criterion, device)
        val_loss = evaluate(model, val_loader,   criterion, device)
        print(f"Epoch {e:02d} | Train Loss: {tr_loss:.4f} | Val Loss: {val_loss:.4f}")
    
    # 샘플 예측
    x_sample, y_true = ds[0]
    x_sample = x_sample.unsqueeze(0).to(device)  # [1, C, seq_len]
    y_pred_scaled = model(x_sample).cpu().detach().numpy().flatten()
    y_true_scaled = y_true.numpy()

# --- 역스케일링 시작 ---
solar_idx = 0

# 전체 피처 개수
C = data.shape[1]

# 예측/실제 배열을 (pred_len, C) 모양으로 만들고 
pred_full = np.zeros((pred_len, C), dtype=np.float32)
true_full = np.zeros((pred_len, C), dtype=np.float32)

# solar generation 채널(solar_idx)에만 값 채우기
pred_full[:, solar_idx] = y_pred_scaled
true_full[:, solar_idx] = y_true_scaled

# MinMaxScaler.inverse_transform
pred_orig = scaler.inverse_transform(pred_full)[:, solar_idx]
true_orig = scaler.inverse_transform(true_full)[:, solar_idx]
# --- 역스케일링 끝 ---

# --- RMSE 계산 ---
import numpy as np

rmse = np.sqrt(np.mean((pred_orig - true_orig) ** 2))
print(f"RMSE: {rmse:.4f}")

Epoch 01 | Train Loss: 0.0892 | Val Loss: 0.0895
Epoch 02 | Train Loss: 0.0824 | Val Loss: 0.0824
Epoch 03 | Train Loss: 0.0765 | Val Loss: 0.0749
Epoch 04 | Train Loss: 0.0694 | Val Loss: 0.0663
Epoch 05 | Train Loss: 0.0623 | Val Loss: 0.0557
Epoch 06 | Train Loss: 0.0526 | Val Loss: 0.0427
Epoch 07 | Train Loss: 0.0434 | Val Loss: 0.0293
Epoch 08 | Train Loss: 0.0355 | Val Loss: 0.0203
Epoch 09 | Train Loss: 0.0329 | Val Loss: 0.0142
Epoch 10 | Train Loss: 0.0280 | Val Loss: 0.0111
Epoch 11 | Train Loss: 0.0240 | Val Loss: 0.0100
Epoch 12 | Train Loss: 0.0232 | Val Loss: 0.0099
Epoch 13 | Train Loss: 0.0237 | Val Loss: 0.0098
Epoch 14 | Train Loss: 0.0240 | Val Loss: 0.0093
Epoch 15 | Train Loss: 0.0240 | Val Loss: 0.0087
Epoch 16 | Train Loss: 0.0235 | Val Loss: 0.0087
Epoch 17 | Train Loss: 0.0230 | Val Loss: 0.0091
Epoch 18 | Train Loss: 0.0224 | Val Loss: 0.0094
Epoch 19 | Train Loss: 0.0221 | Val Loss: 0.0099
Epoch 20 | Train Loss: 0.0220 | Val Loss: 0.0103
RMSE: 2.7849


### 디퓨전 생성모형을 통해 보름치 데이터를 여러개 만들어 넣은 걸 합친 데이터로 하루 예측한 RMSE

In [None]:
import pickle

# 'array.pkl'에 저장된 NumPy 배열을 불러오기
with open('/home1/gkrtod35/Diffusion-TS/array.pkl', 'rb') as f:
    merged = pickle.load(f)

In [None]:
# 합성 데이터에서 X_synth, Y_synth 뽑기
X_synth = merged[:, :seq_len, :]            # (N_synth, seq_len, C)
Y_synth = merged[:, seq_len:seq_len+pred_len, 0]  # (N_synth, pred_len)  # 채널0=타깃

In [None]:
# data.shape == (T, C)
ds_real = SlidingWindowDataset(data, seq_len=seq_len, pred_len=pred_len, stride=24)
# ds_real.X.shape == (N_real, seq_len, C)
# ds_real.Y.shape == (N_real, pred_len)

X_real = ds_real.X.numpy()
Y_real = ds_real.Y.numpy()

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# 1) 합치기
X_all = np.concatenate([X_real, X_synth], axis=0)  # (N_real+N_synth, seq_len, C)
Y_all = np.concatenate([Y_real, Y_synth], axis=0)  # (N_real+N_synth, pred_len)

# 2) 텐서로 변환하고, 채널 축을 앞쪽으로 옮기기: [N, C, seq_len]
X_all = torch.tensor(X_all, dtype=torch.float32).transpose(1,2)
Y_all = torch.tensor(Y_all, dtype=torch.float32)

# 3) Dataset & DataLoader
combined_ds = TensorDataset(X_all, Y_all)


In [None]:
from torch.utils.data import TensorDataset, DataLoader, random_split

# 5) train/val 
if __name__ == "__main__":
    n_train = int(len(combined_ds) * 0.8)
    n_val   = len(combined_ds) - n_train
    train_ds, val_ds = random_split(combined_ds, [n_train, n_val])

    train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=8)

    # 6) 모델·옵티마이저·손실함수 세팅
    device    = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNN_LSTM(in_channels=data.shape[1]).to(device)
    optim     = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    # 7) 학습 루프
    epochs = 20
    for epoch in range(1, epochs+1):
        tr_loss  = train_epoch(model, train_loader, optim, criterion, device)
        val_loss = eval_epoch( model, val_loader,   criterion, device)
        print(f"Epoch {epoch:02d} | Train: {tr_loss:.4f} | Val: {val_loss:.4f}")

    # 8) 샘플 예측 + 역스케일링 + RMSE 계산
    x_sample, y_true = combined_ds[0]
    x_sample = x_sample.unsqueeze(0).to(device)  # [1, C, seq_len]
    y_pred_scaled = model(x_sample).cpu().detach().numpy().flatten()
    y_true_scaled = y_true.numpy()

    # 역스케일링
    solar_idx = 0
    C = data.shape[1]
    pred_full = np.zeros((pred_len, C), dtype=np.float32)
    true_full = np.zeros((pred_len, C), dtype=np.float32)
    pred_full[:, solar_idx] = y_pred_scaled
    true_full[:, solar_idx] = y_true_scaled
    pred_orig = scaler.inverse_transform(pred_full)[:, solar_idx]
    true_orig = scaler.inverse_transform(true_full)[:, solar_idx]

    rmse = np.sqrt(np.mean((pred_orig - true_orig)**2))
    print(f"RMSE: {rmse:.4f}")