In [22]:
from torch_geometric.datasets import QM9
dataset = QM9(root='../../Dataset/QM9')
# feature 11 维， 用data.y[1] 做标签

In [23]:
from torch.utils.data import random_split
import torch
from torch_geometric.data import Dataset, Data, DataLoader
training_set, test_set  = random_split(dataset, [int(len(dataset) * 0.8), len(dataset) - int(len(dataset) * 0.8)], \
    generator=torch.Generator().manual_seed(42))
training_set, validation_set = random_split(training_set, [int(len(training_set) * 0.9), len(training_set) - int(len(training_set) * 0.9)], \
    generator=torch.Generator().manual_seed(42))

In [24]:
len(training_set), len(validation_set), len(test_set)

(94197, 10467, 26167)

In [25]:
import random
import os
import numpy as np
np.set_printoptions(threshold=np.inf)
def seed_torch(seed=42):
	random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed) # 为了禁止hash随机化，使得实验可复现
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.

In [26]:
import torch
from torch_geometric.data import Dataset, Data, DataLoader
import numpy as np
import os
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, precision_score, f1_score, recall_score
import networkx as nx
import torch.nn as nn
from torch_geometric.nn import SAGEConv, global_mean_pool
from torch.utils.data import random_split
import pickle
from torch_geometric.utils import from_networkx

In [38]:
class Model(nn.Module):
    def __init__(self, args):
        super(Model, self).__init__()
        num_classses = 1

        conv_hidden = args['conv_hidden']
        cls_hidden = args['cls_hidden']
        self.n_layers = args['n_layers']

        self.conv_layers = nn.ModuleList([])

        self.conv1 = SAGEConv(11, conv_hidden)

        for i in range(self.n_layers):
            self.conv_layers.append(
                SAGEConv(conv_hidden, conv_hidden)
            )

        self.linear1 = nn.Linear(conv_hidden, cls_hidden)
        self.linear2 = nn.Linear(cls_hidden, num_classses)
        self.relu = nn.ReLU()
        self.drop1 = nn.Dropout(p=0.5)

    def forward(self, mol):

        res = self.conv1(mol.x, mol.edge_index)
        res = self.relu(res)
        for i in range(self.n_layers):
            res = self.relu(self.conv_layers[i](res, mol.edge_index))

        res = global_mean_pool(res, mol.batch)
        res = self.linear1(res)
        res = self.relu(res)
        res = self.drop1(res)
        res = self.linear2(res)

        return res

In [39]:
def train(args, model, device, training_set, optimizer, criterion, epoch):
    model.train()
    total_loss = 0
    for mol in training_set:
        mol = mol.to(device)
        mol.x = mol.x
        target = mol.y[:, 1]
        optimizer.zero_grad()
        output = model(mol)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    print(f'Train Epoch: {epoch}, Ave Loss: {total_loss}')

In [40]:
def val(args, model, device, val_set, optimizer, criterion, epoch):
    model.eval()
    total_loss = 0
    for mol in val_set:
        mol = mol.to(device)
        mol.x = mol.x
        target = mol.y[:, 1]
        optimizer.zero_grad()
        output= model(mol)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    print(f'val Epoch: {epoch}, Ave Loss: {total_loss}')
    return total_loss

In [41]:
def test(model, device, test_set, criterion):
    model.eval()
    with torch.no_grad():
        for mol in test_set:
            mol = mol.to(device)
            mol.x = mol.x
            target = mol.y[:, 1]
            output = model(mol)
            loss = criterion(output, target)            
    print(f'test loss: {loss}')
    return loss

In [42]:
def main(args):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    seed_torch(args['seed'])
    model = Model(args).to(device)
    print(model)
    batch_size = args['batch_size']
    train_loader = DataLoader(training_set, batch_size, shuffle=True)
    val_loader = DataLoader(validation_set, batch_size, shuffle=True)
    test_loader = DataLoader(test_set, batch_size, shuffle=False)
    loss_fn = torch.nn.MSELoss(reduction='mean')
    optimizer = torch.optim.SGD(model.parameters(), lr=args['lr'])
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
    min_loss = 10000
    for epoch in range(1, args['epoch'] + 1):
        train(args, model, device, train_loader, optimizer, loss_fn, epoch)
        val_loss = val(args, model, device, val_loader, optimizer, loss_fn, epoch)
        scheduler.step()
        # nni.report_intermediate_result(val_auc)
        if val_loss < min_loss:
            min_loss = val_loss
            print('Saving model (epoch = {:4d}, top2acc = {:.4f})'
                .format(epoch, val_loss))
            torch.save(model.state_dict(), args['save_path'])
    # final result
    model.load_state_dict(torch.load(args['save_path']))
    final_loss = test(model, device, test_loader, loss_fn)
    # nni.report_final_result(final_auc)
    print(final_loss)

In [43]:
args = {
    'lr': 0.0001, # 0.001 就会nan
    'epoch': 100,
    'seed': 42,
    'save_path': './model/model',
    'conv_hidden':1024,
    'cls_hidden':512,
    'n_layers':3,
    'batch_size':128
}

In [44]:
main(args)

Model(
  (conv_layers): ModuleList(
    (0): SAGEConv(1024, 1024)
    (1): SAGEConv(1024, 1024)
    (2): SAGEConv(1024, 1024)
  )
  (conv1): SAGEConv(11, 1024)
  (linear1): Linear(in_features=1024, out_features=512, bias=True)
  (linear2): Linear(in_features=512, out_features=1, bias=True)
  (relu): ReLU()
  (drop1): Dropout(p=0.5, inplace=False)
)
Train Epoch: 1, Ave Loss: 1659271.231956482
val Epoch: 1, Ave Loss: 15952.446632385254
Train Epoch: 2, Ave Loss: 118089.00852966309
val Epoch: 2, Ave Loss: 6638.082328796387
Saving model (epoch =    2, top2acc = 6638.0823)
Train Epoch: 3, Ave Loss: 98393.75959777832
val Epoch: 3, Ave Loss: 5945.192359924316
Saving model (epoch =    3, top2acc = 5945.1924)
Train Epoch: 4, Ave Loss: 93602.94033050537
val Epoch: 4, Ave Loss: 5944.856391906738
Saving model (epoch =    4, top2acc = 5944.8564)
Train Epoch: 5, Ave Loss: 90316.35327148438
val Epoch: 5, Ave Loss: 5833.482570648193
Saving model (epoch =    5, top2acc = 5833.4826)
Train Epoch: 6, Ave L

  return F.mse_loss(input, target, reduction=self.reduction)
