# Mutag

In [54]:
from math import sqrt
import pickle
import sys

import dgl
import torch

sys.path.append("../")
from gcn import GCNGraph
from utils.preprocessing.mutag_preprocessing_0 \
    import mutag_preprocessing_0

## Data

In [55]:
dataset_dir = "/home/shade/code/github/graph-classification/data/Mutagenicity/raw"
dataset = mutag_preprocessing_0(dataset_dir)

processing


In [56]:
with open("../../../data/Mutagenicity/processed/index.pkl", "rb") as file:
    index = pickle.load(file)
print(index.keys())

dict_keys(['idx_train', 'idx_val', 'idx_test'])


In [57]:
train_dataset = tuple(dataset[idx] for idx in index['idx_train'])
val_dataset = tuple(dataset[idx] for idx in index['idx_val'])
test_dataset = tuple(dataset[idx] for idx in index['idx_test'])

In [58]:
print(len(train_dataset), len(val_dataset), len(test_dataset))

1150 282 1132


## Model

In [59]:
model = GCNGraph(
    in_feats=dataset.graphs[0].ndata['feat'].size(1),
    h_feats=128
)
print(model)

GCNGraph(
  (conv1): GraphConvLayer()
  (conv2): GraphConvLayer()
  (conv3): GraphConvLayer()
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)


## Train

In [60]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()

def train():
    model.train()
    for data in train_dataset: # Iterate in batches over the training dataset.
        graph, label = data
        out = model(
            graph,
            graph.ndata['feat'].float(),
            graph.edata['weight'].float()
        ).squeeze()
        loss = criterion(out, label.float())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(dataset):
    model.eval()
    correct = 0
    for data in dataset: # Iterate in batches over the training/test dataset.
        graph, label = data
        out = model(
            graph,
            graph.ndata['feat'].float(),
            graph.edata['weight'].float()
        ).squeeze()
        pred = out.round()

        correct += int((pred == label).sum())
    return correct / len(dataset)

In [34]:
best_test_acc = 0.0
for epoch in range(1, 201):
    train()
    train_acc = test(train_dataset)
    val_acc = test(val_dataset)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
    if val_acc >= best_test_acc:
        best_test_acc = val_acc
        best_model_params = model.state_dict()
        print("Checkpoint saved!")

Epoch: 001, Train Acc: 0.7913, Val Acc: 0.8652
Checkpoint saved!


KeyboardInterrupt: 

Training fails becuase we have not batched the data. We are supplying one graph at a time for training, which is not helping the model learn.

## Load weights

In [83]:
state_dict = torch.load("new_mutag_weights.pt")
for key, val in state_dict.items():
    print(f"{key:<15}: {val.size()}")

conv1.weight   : torch.Size([10, 128])
conv1.bias     : torch.Size([128])
conv2.weight   : torch.Size([128, 128])
conv2.bias     : torch.Size([128])
conv3.weight   : torch.Size([128, 128])
conv3.bias     : torch.Size([128])
dense1.weight  : torch.Size([16, 128])
dense1.bias    : torch.Size([16])
dense2.weight  : torch.Size([8, 16])
dense2.bias    : torch.Size([8])
dense3.weight  : torch.Size([1, 8])
dense3.bias    : torch.Size([1])


In [84]:
model.load_state_dict(state_dict)
model.eval()

GCNGraph(
  (conv1): GraphConvLayer()
  (conv2): GraphConvLayer()
  (conv3): GraphConvLayer()
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)

## Eval

In [85]:
test_acc = test(test_dataset)
print(f"Test accuracy: {100 * test_acc:.2f} %")

Test accuracy: 98.23 %


## Rough

In [91]:
labels = list()
for data in train_dataset:
    labels.append(int(data[1]))
torch.Tensor(labels).unique(return_counts=True)

(tensor([0., 1.]), tensor([910, 240]))

In [87]:
with open("/home/shade/downloads/masked_adj.pkl", "rb") as file:
    masked_adj_49 = pickle.load(file)

In [88]:
masked_adj_49.requires_grad = False

In [89]:
model(
    dataset.graphs[49],
    dataset.graphs[49].ndata['feat'].float(),
    masked_adj_49
)

tensor([[0.9763]], grad_fn=<SigmoidBackward>)

In [90]:
model(
    dataset.graphs[49],
    dataset.graphs[49].ndata['feat'].float(),
    dataset.graphs[49].edata['weight']
)

tensor([[1.]], grad_fn=<SigmoidBackward>)

In [53]:
dataset.graphs[49].edata['weight'].dtype

torch.float32

In [82]:
state_dict['conv1.weight'][0]

tensor([-0.1370, -0.0764, -0.4804, -0.1968, -0.0904, -0.1928, -0.5255, -0.3249,
        -0.1876, -0.3404, -0.1368,  0.3557, -0.2448, -0.2277, -0.1359, -0.0202,
        -0.3081, -0.5144, -0.2919, -0.0261, -0.4159, -0.0839, -0.2636, -0.6694,
        -0.4344, -0.1721, -0.1353, -0.1064, -0.4041, -0.1016, -0.3166, -0.4336,
         0.0222, -0.9834, -1.0611, -0.2956, -0.3851, -0.3787, -0.2058, -0.2987,
        -0.3875, -0.0783, -0.0639, -0.2598, -0.4567, -0.2782, -0.4126, -0.2928,
        -0.3130, -0.0430, -0.3958, -0.2801, -0.0516, -0.0580, -0.0160, -1.0119,
        -0.0907, -0.1717, -0.0297, -0.3054, -0.6030, -0.3072, -0.4305, -0.1296,
        -0.0167, -0.1503, -0.1967, -0.1303,  0.0871, -0.0463, -0.0833, -0.5474,
        -0.1450,  0.4611, -0.4763, -0.0014, -0.9524, -0.0916, -0.5872, -0.1042,
        -0.3744, -0.0876, -0.8267, -0.3524, -0.1421, -0.0959, -0.1860, -0.1253,
         0.1755, -0.3871, -0.3049, -0.0716, -0.0856, -0.2816, -0.0456,  0.4822,
        -0.1386, -0.1239, -0.2325, -0.19

In [92]:
100 * 240/910

26.373626373626372