# NCI1

In [11]:
import pickle

import torch
from torch_geometric.loader import DataLoader

from GNN import GCN_NCI1
from preprocessing import NCI1Dataset

torch.manual_seed(12345)

<torch._C.Generator at 0x7ff7c5751fb0>

## Data

In [12]:
dataset = NCI1Dataset("../../../data/NCI1")

Processing...


../../../data/NCI1/raw/NCI1_A.txt ../../../data/NCI1/raw/NCI1_graph_indicator.txt ../../../data/NCI1/raw/NCI1_node_labels.txt ../../../data/NCI1/raw/NCI1_graph_labels.txt


Done!


In [13]:
len(dataset)

4110

In [14]:
with open("../../../data/NCI1/index.pkl", "rb") as file:
    index = pickle.load(file)
print(index.keys())

dict_keys(['idx_train', 'idx_val', 'idx_test'])


In [15]:
train_dataset = dataset[index['idx_train']]
val_dataset = dataset[index['idx_val']]
test_dataset = dataset[index['idx_test']]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [16]:
for data in train_loader:
    print(data)
    break

DataBatch(x=[1858, 37], edge_index=[2, 4030], y=[64], batch=[1858], ptr=[65])


## Model

In [17]:
model = GCN_NCI1(
    in_features=dataset.num_node_features,
    h_features=128,
)

In [18]:
print(model)

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=2, bias=True)
)


## Train

In [19]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.NLLLoss()

def train():
    model.train()
    for data in train_loader: # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y.long())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(loader):
    model.eval()
    correct = 0
    for data in loader: # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)
        pred = torch.argmax(out, dim=-1)
        correct += int((pred == data.y.long()).sum())
    return correct / len(loader.dataset)

In [20]:
best_test_acc = 0.0
for epoch in range(1, 101):
    train()
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
    if val_acc >= best_test_acc:
        best_test_acc = val_acc
        best_model_params = model.state_dict()
        print("Checkpoint saved!")

Epoch: 001, Train Acc: 0.5090, Val Acc: 0.4731
Checkpoint saved!
Epoch: 002, Train Acc: 0.5985, Val Acc: 0.5729
Checkpoint saved!
Epoch: 003, Train Acc: 0.5800, Val Acc: 0.5629
Epoch: 004, Train Acc: 0.6127, Val Acc: 0.5928
Checkpoint saved!
Epoch: 005, Train Acc: 0.6165, Val Acc: 0.6427
Checkpoint saved!
Epoch: 006, Train Acc: 0.6448, Val Acc: 0.6088
Epoch: 007, Train Acc: 0.6370, Val Acc: 0.6347
Epoch: 008, Train Acc: 0.6462, Val Acc: 0.6327
Epoch: 009, Train Acc: 0.6535, Val Acc: 0.6687
Checkpoint saved!
Epoch: 010, Train Acc: 0.6180, Val Acc: 0.6427
Epoch: 011, Train Acc: 0.6399, Val Acc: 0.6607
Epoch: 012, Train Acc: 0.6564, Val Acc: 0.6387
Epoch: 013, Train Acc: 0.6730, Val Acc: 0.6607
Epoch: 014, Train Acc: 0.6496, Val Acc: 0.6208
Epoch: 015, Train Acc: 0.6745, Val Acc: 0.6727
Checkpoint saved!
Epoch: 016, Train Acc: 0.6774, Val Acc: 0.6467
Epoch: 017, Train Acc: 0.6720, Val Acc: 0.6327
Epoch: 018, Train Acc: 0.6774, Val Acc: 0.6587
Epoch: 019, Train Acc: 0.6676, Val Acc: 0.6607

## Eval

In [21]:
# best_model_params = torch.load("nci1_weights.pt")
model.load_state_dict(best_model_params)
model.eval()

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=2, bias=True)
)

In [22]:
for dataset in ['train', 'val', 'test']:
    acc = test(eval(f"{dataset}_loader"))
    print(f"{dataset} accuracy: {100 * acc:.2f} %")

train accuracy: 76.01 %
val accuracy: 69.66 %
test accuracy: 68.20 %


## Save weights

In [23]:
torch.save(best_model_params, "nci1_weights.pt")

## Rough