In [None]:
# Reddit 데이터셋 불러오기
from torch_geometric.datasets import Reddit

dataset = Reddit(root='./data/Reddit')
data = dataset[0]  # 데이터셋의 첫 번째 그래프 불러오기

In [None]:
train_nodes = set([i for i, mask in enumerate(train_mask) if mask])
test_nodes = set([i for i, mask in enumerate(test_mask) if mask])

# 본 적 없는 노드가 있는지 확인
unseen_nodes = test_nodes - train_nodes

if unseen_nodes:
    print(f"There are {len(unseen_nodes)} nodes in the test set that are not seen in the training set.")
else:
    print("All nodes in the test set are seen in the training set.")

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Reddit
from torch_geometric.data import NeighborSampler

# 데이터셋 로드
dataset = Reddit(root='./data/Reddit')
data = dataset[0]

class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels, aggr="mean")
        self.conv2 = SAGEConv(hidden_channels, out_channels, aggr="mean")
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(out_channels)

    def forward(self, x, adjs):
        x = self.conv1(x, adjs[0].t())
        x = self.bn1(x)  
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.conv2(x, adjs[-1].t())
        x = self.bn2(x)  
        x = F.relu(x)
        return x

# 모델 및 optimizer 정의
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GraphSAGE(dataset.num_features, 128, dataset.num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=5e-4)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.7)

def train():
    model.train()
    total_loss = 0
    for batch_size, n_id, adjs in train_loader:
        adjs = [adj.to(device) for adj in adjs]
        optimizer.zero_grad()
        out = model(data.x[n_id].to(device), adjs)
        loss = F.cross_entropy(out, data.y[n_id[:batch_size]].to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

# 평가 함수
def test():
    model.eval()
    correct = 0
    for batch_size, n_id, adjs in test_loader:
        adjs = [adj.to(device) for adj in adjs]
        out = model(data.x[n_id].to(device), adjs)
        pred = out.argmax(dim=1)
        correct += (pred == data.y[n_id[:batch_size]].to(device)).sum().item()
    return correct / data.test_mask.sum().item()

# 학습 및 평가 수행
train_loader = NeighborSampler(data.edge_index, sizes=[10, 10], batch_size=1024, shuffle=True, num_nodes=data.num_nodes)
test_loader = NeighborSampler(data.edge_index, sizes=[10, 10], batch_size=1024, shuffle=False, num_nodes=data.num_nodes)

for epoch in range(1, 11):
    scheduler.step() 
    loss = train()
    test_acc = test()
    print(f"Epoch: {epoch:02d}, Loss: {loss:.4f}, Test Accuracy: {test_acc:.4f}")