# NCI1

In [1]:
import torch

from GNN import GCN_NCI1
from preprocessing import NCI1Dataset

## Data

In [2]:
dataset = NCI1Dataset("../../../data/NCI1")
dataset = dataset.shuffle()

In [3]:
from torch_geometric.loader import DataLoader

train_dataset = dataset[:int(0.8 * len(dataset))]
test_dataset = dataset[int(0.8 * len(dataset)):]

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [4]:
for data in train_loader:
    print(data)
    break

DataBatch(x=[976, 37], edge_index=[2, 2082], y=[32], batch=[976], ptr=[33])


## Model

In [5]:
model = GCN_NCI1(
    in_features=dataset.num_node_features,
    h_features=64,
    n_classes=2
)

In [6]:
print(model)

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 64)
  (conv2): GraphConvolution (64 -> 64)
  (conv3): GraphConvolution (64 -> 64)
  (dense1): Linear(in_features=64, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)


## Train

In [9]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch).squeeze()  # Perform a single forward pass.
        loss = criterion(out, data.y.float())  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

def test(loader):
    model.eval()

    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch).squeeze()
        pred = out.round()  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.

In [10]:
best_test_acc = 0.0
for epoch in range(1, 101):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
    if test_acc >= best_test_acc:
        best_test_acc = test_acc
        best_model_params = model.state_dict()
        print("Checkpoint saved!")

Epoch: 001, Train Acc: 0.5836, Test Acc: 0.6046
Checkpoint saved!
Epoch: 002, Train Acc: 0.5809, Test Acc: 0.6131
Checkpoint saved!
Epoch: 003, Train Acc: 0.5861, Test Acc: 0.5949
Epoch: 004, Train Acc: 0.5651, Test Acc: 0.5742
Epoch: 005, Train Acc: 0.5961, Test Acc: 0.6071
Epoch: 006, Train Acc: 0.6013, Test Acc: 0.6204
Checkpoint saved!
Epoch: 007, Train Acc: 0.6131, Test Acc: 0.6314
Checkpoint saved!
Epoch: 008, Train Acc: 0.6344, Test Acc: 0.6703
Checkpoint saved!
Epoch: 009, Train Acc: 0.6235, Test Acc: 0.6253


KeyboardInterrupt: 