# Mutag

In [1]:
from math import sqrt
import sys

import dgl
import torch

sys.path.append("../")
from gcn import GCNGraph
from utils.preprocessing.mutag_preprocessing_0 \
    import mutag_preprocessing_0

## Data

In [2]:
dataset_dir = "/home/shade/code/github/graph-classification/data/Mutagenicity/raw"
dataset = mutag_preprocessing_0(dataset_dir)

processing


In [5]:
import pickle
with open("../../../data/Mutagenicity/processed/index.pkl", "rb") as file:
    index = pickle.load(file)
print(index.keys())

dict_keys(['idx_train', 'idx_val', 'idx_test'])


In [6]:
train_dataset = tuple(dataset[idx] for idx in index['idx_train'])
val_dataset = tuple(dataset[idx] for idx in index['idx_val'])
test_dataset = tuple(dataset[idx] for idx in index['idx_test'])

In [7]:
print(len(train_dataset), len(val_dataset), len(test_dataset))

1150 282 1132


## Model

In [8]:
model = GCNGraph(
    in_feats=dataset.graphs[0].ndata['feat'].size(1),
    h_feats=128
)
print(model)

GCNGraph(
  (conv1): GraphConvLayer()
  (conv2): GraphConvLayer()
  (conv3): GraphConvLayer()
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)


## Load weights

In [9]:
state_dict = torch.load("mutag_weights.pt")
for key, val in state_dict.items():
    print(f"{key:<15}: {val.size()}")

conv1.weight   : torch.Size([10, 128])
conv1.bias     : torch.Size([128])
conv2.weight   : torch.Size([128, 128])
conv2.bias     : torch.Size([128])
conv3.weight   : torch.Size([128, 128])
conv3.bias     : torch.Size([128])
dense1.weight  : torch.Size([16, 128])
dense1.bias    : torch.Size([16])
dense2.weight  : torch.Size([8, 16])
dense2.bias    : torch.Size([8])
dense3.weight  : torch.Size([1, 8])
dense3.bias    : torch.Size([1])


In [10]:
model.load_state_dict(state_dict)
model.eval()

GCNGraph(
  (conv1): GraphConvLayer()
  (conv2): GraphConvLayer()
  (conv3): GraphConvLayer()
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)

## Eval

In [29]:
def test(dataset):
    model.eval()
    correct = 0
    for data in dataset:  # Iterate in batches over the training/test dataset.
        graph, label = data
        mat_size = int(sqrt(graph.edata['weight'].size(0)))
        dense = graph.edata['weight'].reshape(mat_size, mat_size)
        sparse = dense.to_sparse()
        
        out = model(graph, graph.ndata['feat'].float(), sparse.indices()).squeeze()
        pred = out.round()  # Use the class with highest probability.
        # cfsqr preprocessing flips the labels. So we need to flip the predictions as well.
        if pred == 0:
            pred = 1
        else:
            pred = 0
        correct += int((pred == label).sum())  # Check against ground-truth labels.
    return correct / len(dataset)  # Derive ratio of correct predictions.

In [30]:
test_acc = test(test_dataset)
print(f"Test accuracy: {100 * test_acc:.2f} %")

Test accuracy: 96.64 %


## Rough