In [1]:
%load_ext autoreload
%autoreload

In [2]:
enable_wandb = True
if enable_wandb:
    import wandb
    wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mniklas-kemper[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
from torch_geometric.datasets import Planetoid

In [4]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import graph_tool as gt
import graph_tool.generation as gen
import graph_tool.topology as top
import networkx as nx

In [5]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [6]:
def get_rings(edge_index, max_k):
    if isinstance(edge_index, torch.Tensor):
        edge_index = edge_index.numpy()

    edge_list = edge_index.T
    graph_gt = gt.Graph(directed=False)
    graph_gt.add_edge_list(edge_list)
    gen.remove_self_loops(graph_gt)
    gen.remove_parallel_edges(graph_gt)
    rings = set()
    sorted_rings = set()
    for k in range(3, max_k+1):
        pattern = nx.cycle_graph(k)
        pattern_edge_list = list(pattern.edges)
        pattern_gt = gt.Graph(directed=False)
        pattern_gt.add_edge_list(pattern_edge_list)
        sub_isos = top.subgraph_isomorphism(pattern_gt, graph_gt, induced=True, subgraph=True,
                                           generator=True)
        sub_iso_sets = map(lambda isomorphism: tuple(isomorphism.a), sub_isos)
        for iso in sub_iso_sets:
            if tuple(sorted(iso)) not in sorted_rings:
                rings.add(iso)
                sorted_rings.add(tuple(sorted(iso)))
    rings = list(rings)
    return rings


In [7]:
triangles = get_rings(dataset[0].edge_index, max_k = 3)

In [8]:
from itertools import chain
triangle_count = torch.tensor([list(chain(*triangles)).count(i) for i in range(dataset[0].num_nodes)], dtype = torch.float)

In [9]:
substructure_edge_index = torch.tensor([[node_id, triangle_id]  for triangle_id, triangle in enumerate(triangles) for node_id in triangle], dtype = torch.long).t().contiguous()

In [10]:
data = dataset[0].update({"substructure_edge_index": substructure_edge_index, "y": triangle_count})

In [11]:
if enable_wandb:
    wandb.init(project='simple-trinagle-counting')
    summary = dict()
    summary["data"] = dict()
    summary["data"]["num_features"] = data.num_features
    summary["data"]["num_nodes"] = data.num_nodes
    summary["data"]["num_edges"] = data.num_edges 
    summary["data"]["has_isolated_nodes"] = data.has_isolated_nodes()
    summary["data"]["has_self_nodes"] = data.has_self_loops()
    summary["data"]["is_undirected"] = data.is_undirected()
    summary["data"]["num_training_nodes"] = data.train_mask.sum()
    wandb.summary = summary

In [12]:
from simple_model_triangles import SimpleSubstructureNeuralNet 

In [13]:
model = SimpleSubstructureNeuralNet(hidden_channels = 16, out_channels = 1,in_channels = dataset.num_node_features, num_layers = 3)
print(model)

SimpleSubstructureNeuralNet(
  (layers): ModuleList(
    (0): SubstructureLayer(
      (message_neighbor): GINConv(nn=Sequential(
        (0): Linear(in_features=1433, out_features=32, bias=True)
        (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Linear(in_features=32, out_features=16, bias=True)
      ))
      (node2substructure): Sequential(
        (0): Linear(in_features=16, out_features=32, bias=True)
        (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Linear(in_features=32, out_features=16, bias=True)
      )
      (substructure2node): Sequential(
        (0): Linear(in_features=16, out_features=32, bias=True)
        (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Linear(in_features=32, out_features=16, bias=True)
      )
    )
    (1-2): 2 x SubstructureLayer(
  

In [13]:
import torch
import torch.nn.functional as F
from torch.nn import Embedding, ModuleList
from torch.nn import Sequential, Linear, BatchNorm1d, ReLU
#from torch_scatter import scatter
from torch_geometric.nn import GINConv, GINEConv

class SimpleGraphNeuralNet(torch.nn.Module):
    def __init__(self, input_channels, hidden_channels, out_channels, num_layers):
        
        super(SimpleGraphNeuralNet, self).__init__()
        self.num_layers = num_layers

        self.node_convs = ModuleList()

        for layer in range(num_layers):
            if layer == 0:
                nn = Sequential(
                Linear(input_channels, 2 * hidden_channels),
                BatchNorm1d(2 * hidden_channels),
                ReLU(),
                Linear(2 * hidden_channels, hidden_channels),
            )
            else:
                nn = Sequential(
                    Linear(hidden_channels, 2 * hidden_channels),
                    BatchNorm1d(2 * hidden_channels),
                    ReLU(),
                    Linear(2 * hidden_channels, hidden_channels),
                )
            self.node_convs.append(GINConv(nn, train_eps=True))

        self.lin = Linear(hidden_channels, out_channels)

    def reset_parameters(self):

        for conv in self.node_convs:
            conv.reset_parameters()

        self.lin.reset_parameters()

    def forward(self, data):
        x = data.x

        for i in range(self.num_layers):

            x = self.node_convs[i](x=x, edge_index=data.edge_index)


        #x = scatter(x, data.batch, dim=0, reduce='mean')
        #x = F.dropout(x, self.dropout, training=self.training)
        x = self.lin(x)


        #x = F.relu(x)
        #x = F.dropout(x, self.dropout, training=self.training)
        #x = self.lin(x)
        return x

model2 = SimpleGraphNeuralNet(dataset.num_node_features, 16, 1, 3)

In [14]:
model = model2

In [15]:
criterion = F.mse_loss  # Define loss criterion.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)  # Define optimizer.

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = torch.squeeze(model(data))  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = torch.squeeze(model(data))
      pred = torch.round(out)  # Round to integer
      test_loss = criterion(out[data.test_mask], data.y[data.test_mask])
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc, test_loss

#model.reset_parameters()
for epoch in range(1, 201):
    loss = train()
    test_acc, test_loss = test()
    if enable_wandb:
        wandb.log({"substructureGNN/loss": loss, "substructureGNN/test_loss": test_loss, "substructureGNN/test_acc": test_acc})
    
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Loss: 23.2099, Test Loss: 5.5460, Test Acc: 0.4040
Epoch: 002, Loss: 17.4002, Test Loss: 17.9339, Test Acc: 0.2480
Epoch: 003, Loss: 11.7592, Test Loss: 16.5006, Test Acc: 0.2530
Epoch: 004, Loss: 8.5663, Test Loss: 38.5124, Test Acc: 0.1800
Epoch: 005, Loss: 5.4140, Test Loss: 107.5160, Test Acc: 0.0970
Epoch: 006, Loss: 5.3247, Test Loss: 183.9236, Test Acc: 0.0690
Epoch: 007, Loss: 4.4728, Test Loss: 158.0455, Test Acc: 0.0780
Epoch: 008, Loss: 3.3241, Test Loss: 148.4008, Test Acc: 0.0810
Epoch: 009, Loss: 3.5670, Test Loss: 139.0910, Test Acc: 0.0750
Epoch: 010, Loss: 2.7312, Test Loss: 129.9176, Test Acc: 0.0690
Epoch: 011, Loss: 2.9345, Test Loss: 108.9449, Test Acc: 0.0710
Epoch: 012, Loss: 2.7284, Test Loss: 90.6493, Test Acc: 0.0780
Epoch: 013, Loss: 2.4545, Test Loss: 75.6114, Test Acc: 0.0770
Epoch: 014, Loss: 2.3386, Test Loss: 61.7394, Test Acc: 0.0730
Epoch: 015, Loss: 2.1019, Test Loss: 50.2313, Test Acc: 0.0680
Epoch: 016, Loss: 1.9374, Test Loss: 41.7523, 

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(100):
    optimizer.zero_grad()
    out = model(data)
    loss = F.mse_loss(torch.squeeze(out[data.train_mask]), data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

Epoch: 000, Loss: 24.1846
Epoch: 001, Loss: 21.5803
Epoch: 002, Loss: 18.1493
Epoch: 003, Loss: 13.4987
Epoch: 004, Loss: 10.6048
Epoch: 005, Loss: 7.6783
Epoch: 006, Loss: 4.2272
Epoch: 007, Loss: 1.4557
Epoch: 008, Loss: 1.3588
Epoch: 009, Loss: 1.6830
Epoch: 010, Loss: 1.8849
Epoch: 011, Loss: 1.7098
Epoch: 012, Loss: 1.4532
Epoch: 013, Loss: 1.2828
Epoch: 014, Loss: 1.0259
Epoch: 015, Loss: 0.8474
Epoch: 016, Loss: 0.6811
Epoch: 017, Loss: 0.6298
Epoch: 018, Loss: 0.5018
Epoch: 019, Loss: 0.4021
Epoch: 020, Loss: 0.3520
Epoch: 021, Loss: 0.3064
Epoch: 022, Loss: 0.2621
Epoch: 023, Loss: 0.2384
Epoch: 024, Loss: 0.2200
Epoch: 025, Loss: 0.2020
Epoch: 026, Loss: 0.1927
Epoch: 027, Loss: 0.1865
Epoch: 028, Loss: 0.1797
Epoch: 029, Loss: 0.1806
Epoch: 030, Loss: 0.1825
Epoch: 031, Loss: 0.1733
Epoch: 032, Loss: 0.1699
Epoch: 033, Loss: 0.1586
Epoch: 034, Loss: 0.1498
Epoch: 035, Loss: 0.1391
Epoch: 036, Loss: 0.1285
Epoch: 037, Loss: 0.1212
Epoch: 038, Loss: 0.1097
Epoch: 039, Loss: 0.

In [18]:
model.eval()
pred = model(data)
loss = F.mse_loss(torch.squeeze(pred[data.train_mask]), data.y[data.train_mask])
print(f'Loss: {loss:.4f}')

Loss: 0.0136


In [19]:
model.eval()
pred = model(data)
loss = F.mse_loss(torch.squeeze(pred[data.test_mask]), data.y[data.test_mask])
print(f'Loss: {loss:.4f}')

Loss: 0.0190


In [4]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

Accuracy: 0.6010


In [10]:
dataset[0].edge_index

tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ...,  598, 1473, 2706]])

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [12]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7980
