In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import graph_tool.all as gt



In [2]:
from generator_config_sampler import ParamSamplerSpec
from substructure.beam_handler import SampleSubstructureDatasetDoFn, ConvertToTorchGeoDataParDo
from substructure.simulator import Substructure

In [3]:
# Get graph data
param_sampler_specs = [
    ParamSamplerSpec(name="num_graphs",
                     min_val=1000,
                     max_val=1000),
    ParamSamplerSpec(name="num_vertices",
                     min_val=10,
                     max_val=10),
    ParamSamplerSpec(name="edge_prob",
                     min_val=0.6,
                     max_val=0.6),
    ParamSamplerSpec(name="train_prob",
                     min_val=0.6,
                     max_val=0.6)
]

sampler_dofn = SampleSubstructureDatasetDoFn(
    param_sampler_specs, Substructure.TAILED_TRIANGLE_GRAPH
)

sampler_out = next(sampler_dofn.process(sample_id=0))

In [4]:
# Convert to torchgeo data
convert_dofn = ConvertToTorchGeoDataParDo(output_path="/tmp", batch_size=64)
convert_out = next(convert_dofn.process(sampler_out))

In [42]:
# Compute graph metrics
from substructure.beam_handler import ComputeSubstructureGraphMetricsParDo
metrics_dofn = ComputeSubstructureGraphMetricsParDo()
metrics_out = next(metrics_dofn.process(convert_out))

In [40]:
# Try GCN training with graph-world library fns
import gin

from models.benchmarker import Benchmarker, BenchmarkGNNParDo
from models.wrappers import LinearGraphGCNWrapper

gin.bind_parameter('LinearGraphGCNWrapper.num_features', 1)
gin.bind_parameter('LinearGraphGCNWrapper.hidden_channels', 16)
gin.bind_parameter('LinearGraphGCNWrapper.epochs', 100)
gin.bind_parameter('LinearGraphGCNWrapper.lr', 0.0001)
gin.bind_parameter('LinearGraphGCNWrapper.model_name', 'LinearGraphGCN')

benchmarker_wrappers = [
    LinearGraphGCNWrapper,
]
benchmarker_dofn = BenchmarkGNNParDo(benchmarker_wrappers)
benchmarker_dofn.SetOutputPath('/tmp')

In [43]:
benchmarker_out = next(benchmarker_dofn.process(metrics_out))

In [44]:
benchmarker_out

Unnamed: 0,test_mse,test_mse_scaled,num_graphs,num_vertices,edge_prob,train_prob,generator_name,n_nodes,n_edges,edge_density,...,pseudo_diameter,coreness_eq_1,coreness_geq_2,coreness_geq_5,coreness_geq_10,coreness_gini,avg_local_cc,global_cc,cc_size,model_name
0,15584.559448,32.153849,1000,10,0.6,0.6,SubstructureDataset,10.0,27.104,0.301156,...,2.236,0.0022,0.9977,0.2416,0.0,0.029232,0.601822,0.582858,0.9999,LinearGraphGCN


In [10]:
# Copy GCN model from https://colab.sandbox.google.com/drive/1I8a0DfQ3fI7Njc62__mVXUlcAleUclnb?usp=sharing#scrollTo=HvhgQoO8Svw4
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels, num_node_features):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, 1)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(16, 1)
print(model)

GCN(
  (conv1): GCNConv(1, 16)
  (conv2): GCNConv(16, 16)
  (conv3): GCNConv(16, 16)
  (lin): Linear(in_features=16, out_features=1, bias=True)
)


In [15]:
# Copy GCN training from https://colab.sandbox.google.com/drive/1I8a0DfQ3fI7Njc62__mVXUlcAleUclnb?usp=sharing#scrollTo=HvhgQoO8Svw4
model = GCN(hidden_channels=16, num_node_features=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# criterion = torch.nn.CrossEntropyLoss()
criterion = torch.nn.MSELoss()

train_loader = convert_out['torch_dataset']['train']
test_loader = convert_out['torch_dataset']['test']

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.

def test_mse(loader):
     model.eval()

     total_sse = 0.0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         mse = float(criterion(out[:, 0], data.y))
         total_sse += mse * data.batch.size().numel()
     return total_sse / len(loader.dataset)


for epoch in range(1, 171):
    train()
    train_acc = test(train_loader)
    test_acc = test_mse(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.0000, Test Acc: 27412.8763
Epoch: 002, Train Acc: 0.0000, Test Acc: 19268.3648
Epoch: 003, Train Acc: 0.0000, Test Acc: 1830.9077
Epoch: 004, Train Acc: 0.0000, Test Acc: 1123.3676
Epoch: 005, Train Acc: 0.0000, Test Acc: 2185.6266
Epoch: 006, Train Acc: 0.0000, Test Acc: 899.0067
Epoch: 007, Train Acc: 0.0000, Test Acc: 689.1623
Epoch: 008, Train Acc: 0.0000, Test Acc: 1151.8939
Epoch: 009, Train Acc: 0.0000, Test Acc: 817.5259
Epoch: 010, Train Acc: 0.0000, Test Acc: 898.6178
Epoch: 011, Train Acc: 0.0000, Test Acc: 812.8600
Epoch: 012, Train Acc: 0.0000, Test Acc: 929.8403
Epoch: 013, Train Acc: 0.0000, Test Acc: 743.3669
Epoch: 014, Train Acc: 0.0000, Test Acc: 897.0218
Epoch: 015, Train Acc: 0.0000, Test Acc: 827.2895
Epoch: 016, Train Acc: 0.0000, Test Acc: 865.5602
Epoch: 017, Train Acc: 0.0000, Test Acc: 811.4898
Epoch: 018, Train Acc: 0.0000, Test Acc: 837.8930
Epoch: 019, Train Acc: 0.0000, Test Acc: 825.7188
Epoch: 020, Train Acc: 0.0000, Test Acc: 8

Epoch: 166, Train Acc: 0.0000, Test Acc: 739.4761
Epoch: 167, Train Acc: 0.0000, Test Acc: 822.7776
Epoch: 168, Train Acc: 0.0000, Test Acc: 956.0610
Epoch: 169, Train Acc: 0.0000, Test Acc: 1085.0457
Epoch: 170, Train Acc: 0.0000, Test Acc: 767.9491
