# Задание 1. Найти графовый набор данных для решения задачи предсказания (классификация вершин, обнаружение сообществ и т.д.).

In [1]:
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]

In [3]:
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

# Задание 2. Использовать несколько слоев GCNConv из библиотеки PyG для построения GCN модели.

In [4]:
class GCNModel(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [5]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')

model = GCNModel(in_channels=dataset.num_node_features, hidden_channels=16, out_channels=dataset.num_classes).to(device)

In [6]:
device

device(type='cpu')

In [7]:
model

GCNModel(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 7)
)

# Задание 3. Обучить полученную модель, подобрать гиперпараметры (например, learning rate) на валидационной выборке, и оценить качество предсказания на тестовой выборке.

In [8]:
data = data.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-2)

for epoch in range(200):
    model.train()
    optimizer.zero_grad()

    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    train_loss = loss.item()

    if epoch % 30 == 0:
        model.eval()
        with torch.no_grad():
            pred = model(data).argmax(dim=1)
        correct = pred[data.train_mask] == data.y[data.train_mask]
        train_acc = int(correct.sum()) / data.train_mask.sum().item()
        correct = pred[data.val_mask] == data.y[data.val_mask]
        val_acc = int(correct.sum()) / data.val_mask.sum().item()
        print(f'Epoch {epoch}: Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f}, Val acc: {val_acc:.4f}')

model.eval()
with torch.no_grad():
    pred = model(data).argmax(dim=1)
    correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(correct.sum()) / data.test_mask.sum().item()
print(f'Test accuracy: {test_acc:.4f}')

Epoch 0: Train loss: 1.9437, Train acc: 0.6357, Val acc: 0.3760
Epoch 30: Train loss: 0.0865, Train acc: 1.0000, Val acc: 0.7600
Epoch 60: Train loss: 0.0323, Train acc: 1.0000, Val acc: 0.7600
Epoch 90: Train loss: 0.0195, Train acc: 1.0000, Val acc: 0.7620
Epoch 120: Train loss: 0.0306, Train acc: 1.0000, Val acc: 0.7640
Epoch 150: Train loss: 0.0086, Train acc: 1.0000, Val acc: 0.7620
Epoch 180: Train loss: 0.0198, Train acc: 1.0000, Val acc: 0.7640
Test accuracy: 0.7700


# Задание 4. Также представить самостоятельную реализацию слоя GCNConv, используя матричные операции. Повторить обучение с собственными слоями и сравнить результаты.

In [9]:
class CustomGCNConv(nn.Module):
    def __init__(self, in_params, out_params):
        super().__init__()
        self.W = np.random.randn(in_params, out_params)
        self.b = np.zeros((1, out_params))
        self.A, self.X = None, None

    def forward(self, X, A):
        self.A = A
        self.X = X
        #подсчет лапласиана
        D = np.diag(np.sum(A, axis = 1))
        D_sqrt = np.sqrt(D)
        D_inv = np.linalg.pinv(D_sqrt)
        I = np.identity(len(A))
        L = I - np.dot(D_inv, np.dot(A, D_inv))
        return np.dot(np.dot(L, self.X), self.W) + self.b

    def backward(self, er, lr):
        D = np.diag(np.sum(self.A, axis = 1))
        D_sqrt = np.sqrt(D)
        D_inv = np.linalg.pinv(D_sqrt)
        I = np.identity(len(self.A))
        L = I - np.dot(D_inv, np.dot(self.A, D_inv))
        out_data = np.dot(self.X.T, L)
        gradW = np.dot(out_data, er)
        gradb = np.sum(er, axis = 0, keepdims = True)
        self.W -= lr * gradW
        self.b -= lr * gradb
        return np.dot(L.T, np.dot(er, self.W.T))

In [10]:
class CustomGCNModel(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = CustomGCNConv(in_channels, hidden_channels)
        self.conv2 = CustomGCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x * (x > 0)
        x = self.conv2(x, edge_index)
        exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

    def backward(self, y, y_p, alpha = 0.01):
        er = (y_p - y) / y.shape[0]
        grad2 = self.conv2.backward(er, alpha)
        grad1 = self.conv1.backward(grad2, alpha)
        return grad1, grad2

In [11]:
model = CustomGCNModel(in_channels=dataset.num_node_features, hidden_channels=16, out_channels=dataset.num_classes).to(device)

In [12]:
A = np.zeros((data.num_nodes, data.num_nodes))
edge_index = data.edge_index.cpu().numpy()
A[edge_index[0], edge_index[1]] = 1
A[edge_index[1], edge_index[0]] = 1

In [13]:
y = data.y.cpu().numpy()
y = np.eye(len(np.unique(data.y.cpu().numpy())))[y]

In [14]:
def cross_entropy(y, p):
  return - np.mean(np.sum(y * np.log(p + 1e-8), axis = 1))

In [None]:
for epoch in range(1, 10):
    out = model.forward(data.x, A)
    loss = cross_entropy(y[data.train_mask], out[data.train_mask])
    if epoch % 1 == 0:
      print(f'epoch: {epoch}, loss = {loss}')
    model.backward(y, out, alpha = 0.1)

epoch: 1, loss = 12.370691550266892
epoch: 2, loss = 12.263503769796005


In [None]:
Вывод: обучение идет дольше из-за отсутствия перегона в numpy, сильных различий по качеству нет - оба метода показывают наличие обучения