In [1]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import SequentialLR, LinearLR, CosineAnnealingLR
from torch_geometric.nn import GATConv
#from utils.create_split_masks import create_split_masks_regression
#from utils.earlyStopping import EarlyStopping
#from utils.train_GNN_model import train_GNN_model
#from utils.initialize_weights import initialize_weights
import argparse
import os
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
import matplotlib.pyplot as plt


In [2]:
def create_split_masks_regression(data, train_ratio=0.7, val_ratio=0.1, seed=42, normalize=True):
    torch.manual_seed(seed)
    n = data.num_nodes
    indices = torch.randperm(n)
    
    n_train = int(n * train_ratio)
    n_val = int(n * val_ratio)
    
    train_idx = indices[:n_train]
    val_idx = indices[n_train:n_train + n_val]
    test_idx = indices[n_train + n_val:]

    if normalize:
        X = data.x
        scaler = StandardScaler()
        X_train = X[train_idx].numpy()
        scaler.fit(X_train)
        data.x = torch.tensor(scaler.transform(X.numpy()), dtype=torch.float32)

    train_mask = torch.zeros(n, dtype=torch.bool)
    val_mask = torch.zeros(n, dtype=torch.bool)
    test_mask = torch.zeros(n, dtype=torch.bool)
    
    train_mask[train_idx] = True
    val_mask[val_idx] = True
    test_mask[test_idx] = True
    
    return train_mask, val_mask, test_mask

In [4]:
#data_path = "C:/Users/Nutzer/Desktop/DLSS/final_project/DLSS-WAQP/2_GNN_Models/data/data_minmax_catagg.pt"
data_path = "data/data_quantile_Target_QC_aggcat.pt"
data_file = "data_quantile_Target_QC_aggcat.pt"


data = torch.load(data_path, weights_only=False)
train_mask, val_mask, test_mask = create_split_masks_regression(data)
data.y = data.y.view(-1).long()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_dim = data.num_node_features
hidden_dim = 64
# number of attention heads for the first layer
heads = 8
# output_dim equals number of classes
output_dim = len(data.y.unique())



class GAT(nn.Module):
    def __init__(self, in_dim, hid_dim, out_dim, heads=8, dropout=0.6):
        super(GAT, self).__init__()
        # Graph Attention layer: in -> hid*heads
        self.conv1 = GATConv(in_dim, hid_dim, heads=heads, dropout=dropout)
        # Linear map to combine the heads output
        self.lin = nn.Linear(hid_dim * heads, hid_dim)
        # final GAT layer: hid -> out
        self.conv2 = GATConv(hid_dim, out_dim, heads=1, concat=False, dropout=dropout)
        self.elu = nn.ELU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        # First attention layer
        x = self.dropout(x)
        x = self.conv1(x, edge_index)
        x = self.elu(x)
        # Combine multi-head features
        x = self.lin(x)
        x = self.elu(x)
        x = self.dropout(x)
        # Output attention layer
        x = self.conv2(x, edge_index)
        return x

# Instantiate model
model = GAT(input_dim, hidden_dim, output_dim, heads=heads).to(device)
print(model)

# Assign masks
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

# Initialize weights and compile
#initialize_weights(model)
#model = torch.compile(model)
# InductorError: RuntimeError: Compiler: cl is not found. 
# このエラーが出るのはmodelのコードが原因だからコメントアウトするだけでオッケー



GAT(
  (conv1): GATConv(39, 64, heads=8)
  (lin): Linear(in_features=512, out_features=64, bias=True)
  (conv2): GATConv(64, 3, heads=1)
  (elu): ELU(alpha=1.0)
  (dropout): Dropout(p=0.6, inplace=False)
)


In [None]:

# -------------------------------
# Residual GAT Model
# -------------------------------
class ResidualGAT(nn.Module):
    """
    Improved Graph Attention Network with residual connections,
    LayerNorm, GELU activation and dropout tuning.
    """
    def __init__(self, in_dim, hid_dim, out_dim, heads=8, num_layers=3, dropout=0.4):
        super(ResidualGAT, self).__init__()
        self.num_layers = num_layers
        self.dropout = dropout

        self.convs = nn.ModuleList()
        self.norms = nn.ModuleList()

        # First layer
        self.convs.append(GATConv(in_dim, hid_dim, heads=heads, dropout=dropout))
        self.norms.append(nn.LayerNorm(hid_dim * heads))

        # Hidden layers
        for i in range(num_layers - 2):
            self.convs.append(GATConv(hid_dim * heads, hid_dim, heads=heads, dropout=dropout))
            self.norms.append(nn.LayerNorm(hid_dim * heads))

        # Final layer
        self.convs.append(GATConv(hid_dim * heads, out_dim, heads=1, concat=False, dropout=dropout))

        self.gelu = nn.GELU()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        for i in range(len(self.convs) - 1):
            identity = x
            x = self.convs[i](x, edge_index)
            x = self.norms[i](x)
            x = self.gelu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
            # Residual connection
            if identity.shape == x.shape:
                x = x + identity

        # Output layer (no residual here)
        x = self.convs[-1](x, edge_index)
        return x


# -------------------------------
# Training function
# -------------------------------
def train_GNN_model(epochs, model, optimizer, criterion, data, early_stopper, scheduler, device):
    train_losses, val_losses = [], []
    data = data.to(device)
    model.train()

    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        scheduler.step()

        # validation
        model.eval()
        with torch.no_grad():
            val_out = model(data)
            val_loss = criterion(val_out[data.val_mask], data.y[data.val_mask]).item()
        model.train()

        train_losses.append(loss.item())
        val_losses.append(val_loss)

        # early stopping
        if early_stopper(val_loss, model):
            print(f"Early stopping at epoch {epoch+1}")
            break

        if (epoch + 1) % 20 == 0:
            print(f"Epoch {epoch+1}: Train Loss = {loss.item():.4f}, Val Loss = {val_loss:.4f}")

    return train_losses, val_losses


# -------------------------------
# Early Stopping Utility
# -------------------------------
class EarlyStopping:
    def __init__(self, patience=30, min_delta=0.0001, path='checkpoint.pt'):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.path = path

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(model)
            return False
        elif val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.save_checkpoint(model)
            self.counter = 0
            return False
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                return True
        return False

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), self.path)


# -------------------------------
# Main Training Script
# -------------------------------
# data, input_dim, hidden_dim, output_dim は事前に定義されている想定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ResidualGAT(input_dim, hidden_dim, output_dim, heads=8, num_layers=3).to(device)

optimizer = AdamW(model.parameters(), lr=0.005, weight_decay=1e-3)
scheduler = OneCycleLR(
    optimizer,
    max_lr=0.005,
    steps_per_epoch=1,   
    epochs=300,
    pct_start=0.1,
    anneal_strategy='cos',
    div_factor=10,
    final_div_factor=1e4
)

labels = data.y
criterion = nn.CrossEntropyLoss()

early_stopper = EarlyStopping(patience=30, min_delta=0.0001, path="ResidualGAT_checkpoint.pt")

train_losses, val_losses = train_GNN_model(
    epochs=300,
    model=model,
    optimizer=optimizer,
    criterion=criterion,
    data=data,
    early_stopper=early_stopper,
    scheduler=scheduler,
    device=device
)

# -------------------------------
# Plot Loss Curve
# -------------------------------
plt.figure()
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()
