# NCI1

In [2]:
import pickle

import torch
from torch_geometric.loader import DataLoader

from GNN import GCN_NCI1
from preprocessing import NCI1Dataset

torch.manual_seed(12345)

<torch._C.Generator at 0x7f9a37159f90>

## Data

In [3]:
dataset = NCI1Dataset("../../../data/NCI1")

In [4]:
len(dataset)

4110

In [28]:
with open("../../../data/NCI1/index.pkl", "rb") as file:
    index = pickle.load(file)
print(index.keys())

dict_keys(['idx_train', 'idx_val', 'idx_test'])


In [29]:
train_dataset = dataset[index['idx_train']]
val_dataset = dataset[index['idx_val']]
test_dataset = dataset[index['idx_test']]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [14]:
for data in train_loader:
    print(data)
    break

DataBatch(x=[1858, 37], edge_index=[2, 4030], y=[64], batch=[1858], ptr=[65])


## Model

In [15]:
model = GCN_NCI1(
    in_features=dataset.num_node_features,
    h_features=128,
)

In [16]:
print(model)

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (conv3): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)


## Train

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()

def train():
    model.train()
    for data in train_loader: # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch).squeeze()
        loss = criterion(out, data.y.float())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(loader):
    model.eval()
    correct = 0
    for data in loader: # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch).squeeze()
        pred = out.round()
        correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)

In [18]:
best_test_acc = 0.0
for epoch in range(1, 101):
    train()
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
    if val_acc >= best_test_acc:
        best_test_acc = val_acc
        best_model_params = model.state_dict()
        print("Checkpoint saved!")

## Eval

In [10]:
# best_model_params = torch.load("nci1_weights.pt")
model.load_state_dict(best_model_params)
model.eval()

GCN_NCI1(
  (conv1): GraphConvolution (37 -> 128)
  (conv2): GraphConvolution (128 -> 128)
  (conv3): GraphConvolution (128 -> 128)
  (dense1): Linear(in_features=128, out_features=16, bias=True)
  (dense2): Linear(in_features=16, out_features=8, bias=True)
  (dense3): Linear(in_features=8, out_features=1, bias=True)
)

In [11]:
for dataset in ['train', 'val', 'test']:
    acc = test(eval(f"{dataset}_loader"))
    print(f"{dataset} accuracy: {100 * acc:.2f} %")

train accuracy: 75.18 %
val accuracy: 75.25 %
test accuracy: 68.79 %


## Save weights

In [21]:
torch.save(best_model_params, "new_nci1_weights.pt")

## Rough

In [30]:
sum((len(index['idx_train']), len(index['idx_val']), len(index['idx_test'])))

4562

In [31]:
print(
    list(set(index['idx_train']) & set(index['idx_val'])),
    list(set(index['idx_train']) & set(index['idx_test'])),
    list(set(index['idx_val']) & set(index['idx_test'])),
)

[] [] []


In [32]:
max(index['idx_train']), max(index['idx_val']), max(index['idx_test'])

(tensor(4109), tensor(4083), tensor(4108))