In [1]:
from torch_geometric.datasets import Planetoid
import torch
import numpy as np
import networkx as nx

  from .autonotebook import tqdm as notebook_tqdm


In [72]:
def get_k_layer_neighbors_feature(node, num_steps, G, num_nodes):
    neighbors = [node]
    current_node = node
    while len(neighbors) <= num_nodes:
        for j in range(num_steps):
            # Get the indices of the current node's neighbors
            neighbor_indices = list(G.neighbors(current_node))
            # Choose a random neighbor
            if len(neighbor_indices) > 0:
                chosen_neighbor = np.random.choice(neighbor_indices)
                # Move to the chosen neighbor
                current_node = chosen_neighbor
            else:
                break
        neighbors.append(current_node)
    return neighbors

In [149]:
# Load the Cora dataset
dataset = Planetoid(root='data', name='Cora')
data = dataset[0]
# Convert the data object to a NetworkX graph
G = nx.Graph()
edge_index = data.edge_index.numpy()
G.add_edges_from(edge_index.T)
# Set the number of steps for the random walk
num_nodes = 3
# Loop over every node and perform a random walk   

concatenated_features_list = []
for i in range(len(G.nodes)):
    node_features = []
    for num_steps in range(1,2):
        neighbors =  get_k_layer_neighbors_feature(i, num_steps, G, num_nodes)
        num_neighbors = int(len(neighbors))
        # print the neighbros
        print(f"Node {i} has {num_neighbors - 1} neighbors in {num_steps} layer: {neighbors[1:num_neighbors+1]}")
        # add the neighbors
        neighbor_features = data.x[neighbors[1:]]
        temp_feature = data.x[neighbors[0]]
        node_features.append(torch.add(temp_feature, neighbor_features.mean(dim=0))) 
    # concate the feature
    concatenated_features = torch.cat(node_features, dim=0)
    concatenated_features = torch.cat([data.x[i], concatenated_features], dim=0)

    concatenated_features_list.append(concatenated_features)

Node 0 has 3 neighbors in 1 layer: [1862, 2582, 1166]
Node 1 has 3 neighbors in 1 layer: [654, 1, 654]
Node 2 has 3 neighbors in 1 layer: [1, 2, 1986]
Node 3 has 3 neighbors in 1 layer: [2544, 3, 2544]
Node 4 has 3 neighbors in 1 layer: [2175, 982, 2176]
Node 5 has 3 neighbors in 1 layer: [2546, 466, 2546]
Node 6 has 3 neighbors in 1 layer: [1416, 1924, 1416]
Node 7 has 3 neighbors in 1 layer: [208, 7, 208]
Node 8 has 3 neighbors in 1 layer: [269, 8, 281]
Node 9 has 3 neighbors in 1 layer: [723, 2614, 494]
Node 10 has 3 neighbors in 1 layer: [2545, 10, 2545]
Node 11 has 3 neighbors in 1 layer: [1839, 1655, 1131]
Node 12 has 3 neighbors in 1 layer: [1001, 2662, 1001]
Node 13 has 3 neighbors in 1 layer: [1701, 1854, 1701]
Node 14 has 3 neighbors in 1 layer: [158, 180, 1020]
Node 15 has 3 neighbors in 1 layer: [2367, 2368, 2370]
Node 16 has 3 neighbors in 1 layer: [970, 364, 2642]
Node 17 has 3 neighbors in 1 layer: [2140, 1301, 598]
Node 18 has 3 neighbors in 1 layer: [1560, 1623, 314]
N

In [150]:
cora_d =  torch.stack(concatenated_features_list)
cora_d.shape[1]

2866

In [151]:
dataset[0]

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [152]:
dataset[0].x.shape

torch.Size([2708, 1433])

In [153]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(cora_d.shape[1], 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = cora_d, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [154]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [155]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7880
