In [1]:
import sys
sys.path.insert(0, '../stellargraph')

In [2]:
import os

import stellargraph as sg
from stellargraph.mapper import FullBatchNodeGenerator  # Для разбиения графа на батчи
from stellargraph.layer import GCN  # Графовая полносвязная сеть

from IPython.display import display, HTML
from tensorflow.keras import layers, optimizers, losses, metrics, Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn import preprocessing, model_selection

### 1. Подготовка данных

In [3]:
# Больше информации о датасете: https://graphsandnetworks.com/the-cora-dataset/
dataset = sg.datasets.Cora()
display(HTML(dataset.description))
G, node_subjects = dataset.load()

In [4]:
print(G.info())

StellarGraph: Undirected multigraph
 Nodes: 2708, Edges: 5429

 Node types:
  paper: [2708]
    Features: float32 vector, length 1433
    Edge types: paper-cites->paper

 Edge types:
    paper-cites->paper: [5429]
        Weights: all 1 (default)
        Features: none


In [5]:
# Cтатистика по категориям вершин
node_subjects.value_counts().to_frame()

Unnamed: 0,subject
Neural_Networks,818
Probabilistic_Methods,426
Genetic_Algorithms,418
Theory,351
Case_Based,298
Reinforcement_Learning,217
Rule_Learning,180


<h3 style="text-align: center;"><b>Примерно так выглядит датасет. 
Цвет вершины отражает её класс (тему публикации)</b></h3>

<img src='https://paperswithcode.com/media/datasets/Cora-0000000700-ce1c5ec7_LD7pZnT.jpg' width = 500>

In [6]:
# Пример, как можно разбить датасет на train, validation, test
train_subjects, test_subjects = model_selection.train_test_split(
    node_subjects, train_size=140, test_size=None, stratify=node_subjects
)
val_subjects, test_subjects = model_selection.train_test_split(
    test_subjects, train_size=500, test_size=None, stratify=test_subjects
)

In [7]:
# Можно сравнить статистику категорий обучающего подмножества с исходным графом
train_subjects.value_counts().to_frame()

Unnamed: 0,subject
Neural_Networks,42
Probabilistic_Methods,22
Genetic_Algorithms,22
Theory,18
Case_Based,16
Reinforcement_Learning,11
Rule_Learning,9


In [8]:
# node_subjects.head(10)

In [9]:
# train_subjects.head(10)

In [10]:
# Генерация one-hot представлений для целевой переменной
target_encoding = preprocessing.LabelBinarizer()
train_targets = target_encoding.fit_transform(train_subjects)
val_targets = target_encoding.transform(val_subjects)
test_targets = target_encoding.transform(test_subjects)

### 2. GCN-генератор признаков

In [11]:
generator = FullBatchNodeGenerator(G, method='gcn', sparse=False)

Using GCN (local pooling) filters...


In [12]:
train_gen = generator.flow(train_subjects.index, train_targets)

In [13]:
print(train_subjects.index.shape, train_targets.shape)

(140,) (140, 7)


In [14]:
x_inputs, y_train = train_gen[0]

In [15]:
y_train[0][:3]

array([[0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0]])

In [16]:
gcn = GCN(layer_sizes=[16, 16], activations=['relu', 'relu'], generator=generator, dropout=0.5)

In [17]:
x_inp, x_out = gcn.in_out_tensors()

In [18]:
x_inp

[<KerasTensor: shape=(1, 2708, 1433) dtype=float32 (created by layer 'input_1')>,
 <KerasTensor: shape=(1, None) dtype=int32 (created by layer 'input_2')>,
 <KerasTensor: shape=(1, 2708, 2708) dtype=float32 (created by layer 'input_3')>]

In [19]:
x_out

<KerasTensor: shape=(1, None, 16) dtype=float32 (created by layer 'gather_indices')>

In [20]:
predictions = layers.Dense(units=train_targets.shape[1], activation='softmax')(x_out)

In [21]:
predictions

<KerasTensor: shape=(1, None, 7) dtype=float32 (created by layer 'dense')>