In [None]:

# 1) 필수 라이브러리 설치/임포트

import numpy as np
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader, random_split
from dataset.traffic_dataset import TrafficDataset
from dataset.dataset_config import edge_index, edge_attr
#from models.baselines import STGCN  # 튜닝 대상 모델
#from models.FreTSformer import FreTSformer
from models.STLinear import STLinear
from models.STLinear_deriven import STLinear_SPE
from utils.Trainer import Trainer    # 앞서 만든 Trainer
import optuna


# 2) collate_fn 정의 (기존과 동일)
def collate_fn(batch_list):
    xs = torch.stack([data.x for data in batch_list], dim=0)  # [B, T, E, C]
    ys = torch.stack([data.y for data in batch_list], dim=0)  # [B, n_pred, E, D]
    return xs, ys


In [None]:

# 3) 데이터 준비 
dataset_np = np.load('dataset/traffic_dataset_13_smoothen.npy', allow_pickle=True)
dataset = TrafficDataset(dataset_np, window=12, randomize=False)

train_size = int(len(dataset) * 0.8)
val_size   = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=512, shuffle=True, collate_fn=collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=512, shuffle=False, collate_fn=collate_fn)

# 배치 한 번 꺼내서 형상 확인
x0, y0 = next(iter(train_loader))
B, T, E, C_in = x0.shape
_, n_pred, _, C_out = y0.shape

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Dataset shapes: x0={x0.shape}, y0={y0.shape}, device={device}")


In [None]:

# 4) Optuna Objective 정의
def objective(trial):
    # --- Hyperparameter suggestions ---
    # 공통
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)
    dropout = trial.suggest_uniform("dropout", 0.1, 0.3)
    # GNN 전용
    kernel_size = trial.suggest_categorical("kernel_size", [17, 33, 65])
    K = trial.suggest_int("K", 1, 3)
    num_layers = trial.suggest_int("num_layers", 2, 4)
    num_heads = trial.suggest_int("num_heads", 1,2,4)

    input_embedding_dim = trial.suggest_categorical("input_embedding_dim", [16, 32, 64])
    tod_embedding_dim = trial.suggest_categorical("tod_embedding_dim", [16, 32, 64])
    dow_embedding_dim = trial.suggest_categorical("dow_embedding_dim", [16, 32, 64])
    spatial_embedding_dim = trial.suggest_categorical("spatial_embedding_dim", [0, 16, 32, 64])
    adaptive_embedding_dim = trial.suggest_categorical("adaptive_embedding_dim", [0, 16, 32, 64])
    spe_dim = trial.suggest_categorical("spe_dim", [16, 32, 64])
    spe_out_dim = trial.suggest_categorical("spe_out_dim", [16, 32, 64])


    model = STLinear_SPE(
        num_nodes =E,
        kernel_size=kernel_size, #odd number
        num_heads=num_heads,
        num_layers=num_layers,
        dropout=dropout,
        input_embedding_dim = input_embedding_dim,
        tod_embedding_dim = tod_embedding_dim,
        dow_embedding_dim = dow_embedding_dim,
        spatial_embedding_dim = spatial_embedding_dim,
        adaptive_embedding_dim = adaptive_embedding_dim,
        spe_dim = spe_dim,
        spe_out_dim = spe_out_dim
    ).to(device)

    optimizer = AdamW(model.parameters(), lr=5e-5, weight_decay=weight_decay)
    criterion = torch.nn.L1Loss()

    # --- Trainer 실행 (간단히 10 epoch) ---
    trainer = Trainer(
        model=model,
        train_loader=train_loader,
        valid_loader=val_loader,
        optimizer=optimizer,
        criterion=criterion,
        epochs=50,
        device=device,
        print_interval=0,    # 출력 자제
        plot_interval=0,     # 시각화 자제
        early_stopping_patience=4
    )
    trainer.fit()

    # 최종 검증 손실 반환
    valid_loss = trainer.get_best_valid_loss() # 또는 직접 기록한 best
    return valid_loss


In [None]:

# 5) Optuna Study 생성 및 최적화 실행
study = optuna.create_study(
    direction="minimize",
    sampler=optuna.samplers.TPESampler(seed=42)
)
study.optimize(objective, n_trials=30)


# 6) 최적 결과 확인
print("Best validation loss:", study.best_value)
print("Best hyperparameters:")
for k,v in study.best_params.items():
    print(f"  {k}: {v}")






In [None]:
# 7) 베스트 파라미터로 재학습 & 결과 시각화 예시
best_params = study.best_params
best_model = STLinear_SPE(
    num_nodes=E,
    # GNN 전용 파라미터
    kernel_size=best_params['kernel_size'],        # odd number
    num_heads=best_params['num_heads'],
    num_layers=best_params['num_layers'],
    dropout=best_params['dropout'],
    # 임베딩 차원 파라미터
    input_embedding_dim=best_params['input_embedding_dim'],
    tod_embedding_dim=best_params['tod_embedding_dim'],
    dow_embedding_dim=best_params['dow_embedding_dim'],
    spatial_embedding_dim=best_params['spatial_embedding_dim'],
    adaptive_embedding_dim=best_params['adaptive_embedding_dim'],
    spe_dim=best_params['spe_dim'],
    spe_out_dim=best_params['spe_out_dim']
).to(device)

best_opt = AdamW(best_model.parameters(), lr=5e-5, weight_decay=best_params['weight_decay'])
trainer = Trainer(
    model=best_model,
    train_loader=train_loader,
    valid_loader=val_loader,
    optimizer=best_opt,
    criterion=torch.nn.L1Loss(),
    epochs=60,
    device=device,
    print_interval=0,
    plot_interval=2,
    auto_save=True,
    save_dir='./final_model'
)
trainer.fit()
hist = trainer.get_history()

import matplotlib.pyplot as plt
plt.plot(hist['train_loss'], label='Train Loss')
plt.plot(hist['valid_loss'], label='Val Loss')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.cluster.hierarchy import linkage, leaves_list

x_batch, y_batch = next(iter(train_loader))  # (B, T, E, D), (B, T_out, E, D_out)
x_input = x_batch[0].unsqueeze(0).to(device) # B=1로 만듦

best_model.eval()

with torch.no_grad():
    output, attention_maps = best_model(x_input, return_attn=True)  # x_input: (B, T, E, D')


layer0 = attention_maps[3]            # 첫 번째 레이어
B, H, T, E, _ = layer0.shape

# 2) Aggregation 예시: 
#   a) 헤드 평균 → (B, T, E, E)
#   b) 특정 시점만 뽑기 or 시점 평균 → (B, E, E)
# 아래는 배치0, 시점0에 대해 “헤드 평균”을 취한 맵
attn_heads_avg = layer0[0].mean(dim=0)    # → (T, E, E)
time_idx = 0
attn_agg = attn_heads_avg[time_idx].cpu().numpy()  # (E, E)

# or, 시점 평균까지 함께 하고 싶다면:
# attn_agg = attn_heads_avg.mean(dim=0).cpu().numpy()  # (E, E)

# 3) Clustering: 계층적 군집화로 노드 순서(order) 구하기
#    similarity 로는 각 노드 i의 “전체 주목도”(행 평균)를 사용
sim = attn_agg.mean(axis=1)            # (E,)
Z = linkage(sim.reshape(-1,1), method='average')
order = leaves_list(Z)                 # 군집화된 노드 인덱스 순서

# 4) 재배열된 맵
attn_clustered = attn_agg[order][:, order]

# 5) 시각화
plt.figure(figsize=(6,5))
sns.heatmap(attn_clustered, cmap='viridis', 
            xticklabels=order, yticklabels=order)
plt.title(f'Layer0 Head-Avg t={time_idx} (Clustered)')
plt.xlabel('Reordered Key Node')
plt.ylabel('Reordered Query Node')
plt.tight_layout()
plt.show()
    

# # 예시 시각화: 첫 번째 레이어, 첫 번째 배치, 첫 타임스텝, 헤드 0
# import matplotlib.pyplot as plt
# import seaborn as sns

# attn = attention_maps[2][0, 0, 0].cpu().numpy()  # (E, E)
# sns.heatmap(attn)
# plt.title("Layer 1, Head 0 Attention Map at t=0")
# plt.show()

import networkx as nx

N = attn_clustered.shape[0]
G = nx.DiGraph()
for i in range(N):
    for j in range(N):
        w = attn_clustered[i,j]
        if w > 0.03:  # threshold
            G.add_edge(i, j, weight=w)


pos = nx.spring_layout(G)

plt.figure(figsize=(8, 6))
nx.draw(G, pos, with_labels=True,
        width=[G[u][v]['weight']*5 for u,v in G.edges()])

plt.title("Spatial Attention Graph (thresholded)")
# 4) 맨 마지막에 show()
plt.show()