# NCI1

In [1]:
import pickle

import torch
from torch_geometric.loader import DataLoader

from GNN import GCN_NCI1
from preprocessing import NCI1Dataset

torch.manual_seed(12345)

## Data

In [2]:
dataset = NCI1Dataset("../../../data/NCI1")

In [3]:
with open("../../../data/NCI1/index.pkl", "rb") as file:
    index = pickle.load(file)
print(index.keys())

dict_keys(['idx_train', 'idx_val', 'idx_test'])


In [4]:
train_dataset = dataset[index['idx_train']]
val_dataset = dataset[index['idx_val']]
test_dataset = dataset[index['idx_test']]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [5]:
for data in train_loader:
    print(data)
    break

DataBatch(x=[1953, 37], edge_index=[2, 4228], y=[64], batch=[1953], ptr=[65])


## Model

In [6]:
model = GCN_NCI1(
    in_features=dataset.num_node_features,
    h_features=128,
    n_classes=2
)

In [7]:
print(model)

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (conv3): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)


## Train

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()

def train():
    model.train()
    for data in train_loader: # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch).squeeze() # Perform a single forward pass.
        loss = criterion(out, data.y.float()) # Compute the loss.
        loss.backward() # Derive gradients.
        optimizer.step() # Update parameters based on gradients.
        optimizer.zero_grad() # Clear gradients.

def test(loader):
    model.eval()
    correct = 0
    for data in loader: # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch).squeeze()
        pred = out.round() # Use the class with highest probability.
        correct += int((pred == data.y).sum()) # Check against ground-truth labels.
    return correct / len(loader.dataset) # Derive ratio of correct predictions.

In [18]:
best_test_acc = 0.0
for epoch in range(1, 101):
    train()
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
    if val_acc >= best_test_acc:
        best_test_acc = val_acc
        best_model_params = model.state_dict()
        print("Checkpoint saved!")

Epoch: 001, Train Acc: 0.5908, Test Acc: 0.5669
Checkpoint saved!
Epoch: 002, Train Acc: 0.5805, Test Acc: 0.5908
Checkpoint saved!
Epoch: 003, Train Acc: 0.6107, Test Acc: 0.6367
Checkpoint saved!
Epoch: 004, Train Acc: 0.6238, Test Acc: 0.6307
Epoch: 005, Train Acc: 0.6560, Test Acc: 0.6287
Epoch: 006, Train Acc: 0.6491, Test Acc: 0.6208
Epoch: 007, Train Acc: 0.6224, Test Acc: 0.5808
Epoch: 008, Train Acc: 0.6745, Test Acc: 0.6507
Checkpoint saved!
Epoch: 009, Train Acc: 0.6603, Test Acc: 0.6627
Checkpoint saved!
Epoch: 010, Train Acc: 0.6706, Test Acc: 0.6687
Checkpoint saved!
Epoch: 011, Train Acc: 0.6905, Test Acc: 0.6906
Checkpoint saved!
Epoch: 012, Train Acc: 0.6866, Test Acc: 0.6886
Epoch: 013, Train Acc: 0.6847, Test Acc: 0.6766
Epoch: 014, Train Acc: 0.6939, Test Acc: 0.7126
Checkpoint saved!
Epoch: 015, Train Acc: 0.6964, Test Acc: 0.7146
Checkpoint saved!
Epoch: 016, Train Acc: 0.6793, Test Acc: 0.6966
Epoch: 017, Train Acc: 0.6920, Test Acc: 0.7106
Epoch: 018, Train Acc:

## Eval

In [9]:
best_model_params = torch.load("nci1_weights.pt")
model.load_state_dict(best_model_params)
model.eval()

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (conv3): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)

In [12]:
for dataset in ['train', 'val', 'test']:
    acc = test(eval(f"{dataset}_loader"))
    print(f"{dataset} accuracy: {100 * acc:.2f} %")

train accuracy: 78.69 %
val accuracy: 74.85 %
test accuracy: 70.54 %


## Save weights

In [21]:
torch.save(best_model_params, "nci1_weights.pt")

## Rough

In [25]:
for data in dataset[:10]:
    print(data)

Data(x=[21, 37], edge_index=[2, 42], y=0)
Data(x=[24, 37], edge_index=[2, 52], y=0)
Data(x=[29, 37], edge_index=[2, 58], y=0)
Data(x=[28, 37], edge_index=[2, 62], y=0)
Data(x=[23, 37], edge_index=[2, 46], y=0)
Data(x=[26, 37], edge_index=[2, 54], y=0)
Data(x=[13, 37], edge_index=[2, 26], y=0)
Data(x=[16, 37], edge_index=[2, 36], y=0)
Data(x=[26, 37], edge_index=[2, 54], y=0)
Data(x=[20, 37], edge_index=[2, 42], y=0)


In [34]:
best_model_params['conv1.weight']

tensor([[-0.0118, -0.0508, -0.5350,  ..., -0.0652, -0.9004, -0.1335],
        [-0.0654, -0.1208, -0.3913,  ...,  0.7258, -0.4484, -0.0888],
        [-0.0500, -0.4100, -0.4445,  ..., -0.3790, -0.7915, -0.0540],
        ...,
        [ 0.0473, -0.0317, -0.0477,  ..., -0.0682, -0.0147, -0.0491],
        [ 0.0347,  0.0393,  0.0695,  ..., -0.0644, -0.0703,  0.0640],
        [ 0.0154,  0.6167,  0.5162,  ...,  0.7527,  0.0603, -0.0775]])