# Mutag

In [1]:
from math import sqrt
import pickle
import sys

import dgl
import torch

sys.path.append("../")
from gcn import GCNGraph
from utils.preprocessing.mutag_preprocessing_0 \
    import mutag_preprocessing_0

## Data

In [2]:
dataset_dir = "/home/shade/code/github/graph-classification/data/Mutagenicity/raw"
dataset = mutag_preprocessing_0(dataset_dir)

processing


In [3]:
with open("../../../data/Mutagenicity/processed/index.pkl", "rb") as file:
    index = pickle.load(file)
print(index.keys())

dict_keys(['idx_train', 'idx_val', 'idx_test'])


In [4]:
train_dataset = tuple(dataset[idx] for idx in index['idx_train'])
val_dataset = tuple(dataset[idx] for idx in index['idx_val'])
test_dataset = tuple(dataset[idx] for idx in index['idx_test'])

In [5]:
print(len(train_dataset), len(val_dataset), len(test_dataset))

1150 282 1132


## Model

In [6]:
model = GCNGraph(
    in_feats=dataset.graphs[0].ndata['feat'].size(1),
    h_feats=128
)
print(model)

GCNGraph(
  (conv1): GraphConvLayer()
  (conv2): GraphConvLayer()
  (conv3): GraphConvLayer()
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)


## Load weights

In [7]:
state_dict = torch.load("mutag_weights.pt")
for key, val in state_dict.items():
    print(f"{key:<15}: {val.size()}")

conv1.weight   : torch.Size([10, 128])
conv1.bias     : torch.Size([128])
conv2.weight   : torch.Size([128, 128])
conv2.bias     : torch.Size([128])
conv3.weight   : torch.Size([128, 128])
conv3.bias     : torch.Size([128])
dense1.weight  : torch.Size([16, 128])
dense1.bias    : torch.Size([16])
dense2.weight  : torch.Size([8, 16])
dense2.bias    : torch.Size([8])
dense3.weight  : torch.Size([1, 8])
dense3.bias    : torch.Size([1])


In [8]:
model.load_state_dict(state_dict)
model.eval()

GCNGraph(
  (conv1): GraphConvLayer()
  (conv2): GraphConvLayer()
  (conv3): GraphConvLayer()
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)

## Eval

In [9]:
def test(dataset):
    model.eval()
    correct = 0
    for data in dataset: # Iterate in batches over the training/test dataset.
        graph, label = data
        out = model(
            graph,
            graph.ndata['feat'].float(),
            graph.edata['weight'].float()
        ).squeeze()
        break
        pred = out.round()
        correct += int((pred == label).sum())
    return correct / len(dataset)

In [10]:
test_acc = test(test_dataset)
print(f"Test accuracy: {100 * test_acc:.2f} %")

tensor(indices=tensor([[ 0,  0,  0,  0,  1,  1,  1,  2,  2,  3,  3,  3,  3,  4,
                         4,  4,  4,  5,  5,  5,  6,  7,  7,  7,  8,  8,  8,  9,
                         9,  9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13,
                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
                       [ 1,  2,  3,  4,  0,  5,  6,  0,  7,  0,  4, 15, 16,  0,
                         3, 17, 18,  1,  7,  8,  1,  2,  5,  9,  5, 10, 19,  7,
                        11, 20,  8, 11, 12,  9, 10, 21, 10, 13, 14, 12, 22, 23,
                        24, 12,  3,  3,  4,  4,  8,  9, 11, 13, 13, 13]]),
       values=tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
       size=(25, 25), nnz=54, layout=torch.sparse_coo)
Test accura

## Rough

In [13]:
labels = list()
for data in val_dataset:
    labels.append(int(data[1]))
torch.Tensor(labels).unique(return_counts=True)

(tensor([0., 1.]), tensor([ 38, 244]))