# NCI1

In [1]:
import pickle

import torch
from torch_geometric.loader import DataLoader

from GNN import GCN_NCI1
from preprocessing import NCI1Dataset

torch.manual_seed(12345)

<torch._C.Generator at 0x7fea7d6d5f90>

## Data

In [2]:
dataset = NCI1Dataset("../../../data/NCI1")

In [3]:
len(dataset)

4110

In [4]:
with open("../../../data/NCI1/index.pkl", "rb") as file:
    index = pickle.load(file)
print(index.keys())

dict_keys(['idx_train', 'idx_val', 'idx_test'])


In [5]:
train_dataset = dataset[index['idx_train']]
val_dataset = dataset[index['idx_val']]
test_dataset = dataset[index['idx_test']]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [15]:
graphs = [len(set(index)) for index in [index['idx_train'], index['idx_val'], index['idx_test']]]
[round(100 * i/sum(graphs), 2) for i in graphs]

[45.05, 10.98, 43.97]

In [6]:
for data in train_loader:
    print(data)
    break

DataBatch(x=[1858, 37], edge_index=[2, 4030], y=[64], batch=[1858], ptr=[65])


## Model

In [7]:
model = GCN_NCI1(
    in_features=dataset.num_node_features,
    h_features=128,
)

In [8]:
print(model)

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=64, bias=True)
  (dense2): Linear(in_features=64, out_features=32, bias=True)
  (dense3): Linear(in_features=32, out_features=2, bias=True)
)


## Train

In [9]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.NLLLoss()

def train():
    model.train()
    for data in train_loader: # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y.long())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(loader):
    model.eval()
    correct = 0
    for data in loader: # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)
        pred = torch.argmax(out, dim=-1)
        correct += int((pred == data.y.long()).sum())
    return correct / len(loader.dataset)

In [10]:
best_test_acc = 0.0
for epoch in range(1, 101):
    train()
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
    if val_acc >= best_test_acc:
        best_test_acc = val_acc
        best_model_params = model.state_dict()
        print("Checkpoint saved!")

Epoch: 001, Train Acc: 0.4920, Val Acc: 0.5250
Checkpoint saved!
Epoch: 002, Train Acc: 0.4920, Val Acc: 0.5250
Checkpoint saved!
Epoch: 003, Train Acc: 0.5742, Val Acc: 0.5749
Checkpoint saved!
Epoch: 004, Train Acc: 0.5742, Val Acc: 0.5788
Checkpoint saved!
Epoch: 005, Train Acc: 0.5864, Val Acc: 0.5729
Epoch: 006, Train Acc: 0.5139, Val Acc: 0.4930
Epoch: 007, Train Acc: 0.5873, Val Acc: 0.6048
Checkpoint saved!
Epoch: 008, Train Acc: 0.5903, Val Acc: 0.5828
Epoch: 009, Train Acc: 0.6258, Val Acc: 0.6228
Checkpoint saved!
Epoch: 010, Train Acc: 0.5917, Val Acc: 0.5928
Epoch: 011, Train Acc: 0.5640, Val Acc: 0.5250
Epoch: 012, Train Acc: 0.5509, Val Acc: 0.5170
Epoch: 013, Train Acc: 0.5903, Val Acc: 0.5729
Epoch: 014, Train Acc: 0.6092, Val Acc: 0.5968
Epoch: 015, Train Acc: 0.6214, Val Acc: 0.6367
Checkpoint saved!
Epoch: 016, Train Acc: 0.6102, Val Acc: 0.5609
Epoch: 017, Train Acc: 0.6282, Val Acc: 0.5888
Epoch: 018, Train Acc: 0.6360, Val Acc: 0.6287
Epoch: 019, Train Acc: 0.651

## Eval

In [11]:
# best_model_params = torch.load("nci1_weights.pt")
model.load_state_dict(best_model_params)
model.eval()

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=64, bias=True)
  (dense2): Linear(in_features=64, out_features=32, bias=True)
  (dense3): Linear(in_features=32, out_features=2, bias=True)
)

In [12]:
for dataset in ['train', 'val', 'test']:
    acc = test(eval(f"{dataset}_loader"))
    print(f"{dataset} accuracy: {100 * acc:.2f} %")

train accuracy: 75.62 %
val accuracy: 69.66 %
test accuracy: 68.00 %


## Save weights

In [13]:
torch.save(best_model_params, "nci1_weights.pt")

## Rough