In [31]:
from torch_geometric.datasets import Planetoid
import pandas as pd
from torchmetrics.classification import Accuracy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj

In [None]:
dataset= Planetoid(root='E:/glass_git/ML-DL/Graph/data', name='Cora')
data=dataset[0]
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [None]:
''' 
    [x]    
    가로(행)=features, 세로(열)=노드
    노드는 총 2708개, 각 노드별로 feature는 1433개
    
    [y]
    노드의 class label, 총 7개의 class가 존재
'''
display(data.x)
display(data.y)


tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

tensor([3, 4, 4,  ..., 3, 3, 3])

In [27]:
class MLP(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(in_channels, hidden_channels)
        self.fc2 = nn.Linear(hidden_channels, out_channels)
        self.acc = Accuracy(task="multiclass", num_classes=7) 

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x,dim=1)
    
    def fit(self, data, epochs):
        criterion=nn.CrossEntropyLoss()
        optimizer=torch.optim.Adam(self.parameters(), lr=0.01, weight_decay=5e-4)

        self.train()
        for epoch in range(epochs):
            optimizer.zero_grad()
            out = self(data.x) #self(...)는 항상 forward()만 호출한다.
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc_score = self.acc(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 10 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc_score = self.acc(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f'Epoch {epoch},Train Loss: {loss.item():.3f}, Test Loss: {val_loss.item():.3f}, Train Acc: {acc_score:.3f}, Val Acc: {val_acc_score:.3f}')

    def test(self,data):
        self.eval()
        out=self(data.x)
        acc_score = self.acc(out[data.test_mask].argmax(dim=1), data.y[data.test_mask])
        return acc_score

In [None]:
mlp=MLP(dataset.num_features, 16, dataset.num_classes)
mlp.fit(data,epochs=100)
acc_score=mlp.test(data)
print(f'MLP test accuracy: {acc_score*100:.2f}%')

Epoch 0,Train Loss: 1.960, Test Loss: 2.006, Train Acc: 0.143, Val Acc: 0.060
Epoch 10,Train Loss: 0.601, Test Loss: 1.566, Train Acc: 1.000, Val Acc: 0.540
Epoch 20,Train Loss: 0.091, Test Loss: 1.377, Train Acc: 1.000, Val Acc: 0.548
Epoch 30,Train Loss: 0.026, Test Loss: 1.391, Train Acc: 1.000, Val Acc: 0.544
Epoch 40,Train Loss: 0.011, Test Loss: 1.414, Train Acc: 1.000, Val Acc: 0.532
Epoch 50,Train Loss: 0.007, Test Loss: 1.409, Train Acc: 1.000, Val Acc: 0.536
Epoch 60,Train Loss: 0.006, Test Loss: 1.384, Train Acc: 1.000, Val Acc: 0.540
Epoch 70,Train Loss: 0.007, Test Loss: 1.355, Train Acc: 1.000, Val Acc: 0.544
Epoch 80,Train Loss: 0.007, Test Loss: 1.333, Train Acc: 1.000, Val Acc: 0.558
Epoch 90,Train Loss: 0.008, Test Loss: 1.324, Train Acc: 1.000, Val Acc: 0.564


In [38]:
class VanillaGNNLayer(nn.Module):
    def __init__(self,dim_in,dim_out):
        super(VanillaGNNLayer,self).__init__()
        self.linear=nn.Linear(dim_in,dim_out,bias=False)
        
    def forward(self, x, adj):
        x=self.linear(x)
        x=torch.sparse.mm(adj,x)
        return x
    
class VanillaGNN(nn.Module):
    def __init__(self,dim_in,dim_h,dim_out):
        super(VanillaGNN,self).__init__()
        self.gnn1=VanillaGNNLayer(dim_in,dim_h)
        self.gnn2=VanillaGNNLayer(dim_h,dim_out)
        self.acc=Accuracy(task="multiclass", num_classes=7) 
    def forward(self,x,adj):
        h=self.gnn1(x, adj)
        h=torch.relu(h)
        h=self.gnn2(h,adj)
        return F.log_softmax(h,dim=1)
    
    def fit(self,data,adj,epochs):
        criterion=nn.CrossEntropyLoss()
        optim=torch.optim.Adam(self.parameters(),lr=0.01, weight_decay=5e-4)
        self.train()
        for epoch in range(epochs+1):
            optim.zero_grad()
            out=self(data.x,adj)
            loss=criterion(out[data.train_mask],data.y[data.train_mask])
            train_acc=self.acc(out[data.train_mask].argmax(dim=1),data.y[data.train_mask])
            loss.backward()
            optim.step()
            if epoch % 20 ==0:
                val_loss=criterion(out[data.val_mask],data.y[data.val_mask])
                val_acc=self.acc(out[data.val_mask].argmax(dim=1),data.y[data.val_mask])
                print(f'epoch: {epoch}, train loss: {loss.item():.3f}, test loss: {val_loss.item():.3f}, train acc: {train_acc*100:.3f}, test acc: {val_acc*100:.3f}')
        
    def test(self,data,adj):
        self.eval()
        out=self(data.x,adj)
        total_acc=self.acc(out.argmax(dim=1)[data.test_mask],data.y[data.test_mask])
        return total_acc


In [39]:
adj=to_dense_adj(data.edge_index)[0]
adj+=torch.eye(len(adj))

gnn=VanillaGNN(dataset.num_features, 16, dataset.num_classes)
gnn.fit(data,adj,epochs=100)
acc=gnn.test(data,adj)
print(f'GNN test accuracy: {acc*100:.3f}')

epoch: 0, train loss: 2.147, test loss: 2.174, train acc: 11.429, test acc: 5.200
epoch: 20, train loss: 0.020, test loss: 1.987, train acc: 100.000, test acc: 72.800
epoch: 40, train loss: 0.002, test loss: 2.372, train acc: 100.000, test acc: 74.600
epoch: 60, train loss: 0.001, test loss: 2.464, train acc: 100.000, test acc: 74.000
epoch: 80, train loss: 0.001, test loss: 2.419, train acc: 100.000, test acc: 75.200
epoch: 100, train loss: 0.001, test loss: 2.344, train acc: 100.000, test acc: 75.600
GNN test accuracy: 77.000
