# MPNN训练 QM9

## 添加包环境路径

In [1]:
import torch
import torch.nn.functional as F
from torch.nn import Sequential, Linear, ReLU, GRU

import torch_geometric.transforms as T
from torch_geometric.nn import NNConv, Set2Set
from torch_geometric.data import DataLoader
from torch_geometric.utils import remove_self_loops

In [2]:
import sys
sys.path.append('/Users/yuekong/Desktop/Github/Kinase-Transformer-GNN')
from molecular_network.mol_dataset import QM9

# 载入数据集

In [3]:
target = 1 # 预测第几个性质
dim = 64
training_size = 1000 # 控制training set的大小

In [4]:
root = '/Users/yuekong/Desktop/Github/Kinase-Transformer-GNN/data/QM9'
dataset = QM9(root).shuffle()
dataset.data.y.shape

torch.Size([129433, 19])

# 归一化数据集

In [5]:
dataset.data.y = dataset.data.y[:,target] #指定需要预测的性质

In [6]:
# Normalize targets to mean = 0 and std = 1.
mean = dataset.data.y.mean(dim=0, keepdim=True)
std = dataset.data.y.std(dim=0, keepdim=True)
dataset.data.y = (dataset.data.y - mean) / std ## 只考虑第1个性质

# 划分训练集

In [7]:
# Split datasets.
test_dataset = dataset[:10000]
val_dataset = dataset[10000:20000]
train_dataset = dataset[20000:20000+training_size]
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
print('------loader loaded------')


------loader loaded------


# 定义模型

In [8]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lin0 = torch.nn.Linear(dataset.num_features, dim)

        # 这个nn是每个节点在message中用到的网络,5对应边特征数 
        # 注：此脚本中transform部分把edge_attr变成了5，而不是本来的4
        nn = Sequential(Linear(4, 128), ReLU(), Linear(128, dim * dim))
        self.conv = NNConv(dim, dim, nn, aggr='mean')
        self.gru = GRU(dim, dim)

        self.set2set = Set2Set(dim, processing_steps=3)
        self.lin1 = torch.nn.Linear(2 * dim, dim)
        self.lin2 = torch.nn.Linear(dim, 1)

    def forward(self, data):
        out = F.relu(self.lin0(data.x))
        h = out.unsqueeze(0)

        for i in range(3):
            m = F.relu(self.conv(out, data.edge_index, data.edge_attr))
            out, h = self.gru(m.unsqueeze(0), h)
            out = out.squeeze(0)

        out = self.set2set(out, data.batch)
        out = F.relu(self.lin1(out))
        out = self.lin2(out)
        return out.view(-1)

# 训练前

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                       factor=0.7, patience=5,
                                                       min_lr=0.00001)

# 定义训练+测试

In [10]:
def train(epoch):
    model.train()
    loss_all = 0

    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        loss = F.mse_loss(model(data), data.y)
        loss.backward()
        loss_all += loss.item() * data.num_graphs
        optimizer.step()
    return loss_all / len(train_loader.dataset)


def test(loader):
    model.eval()
    error = 0

    for data in loader:
        data = data.to(device)
        error += (model(data) * std - data.y * std).abs().sum().item()  # MAE
    return error / len(loader.dataset)

# 训练！

In [11]:
best_val_error = None
for epoch in range(1, 301):
    print('-----------training begin-------------')
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    val_error = test(val_loader)
    scheduler.step(val_error)

    if best_val_error is None or val_error <= best_val_error:
        test_error = test(test_loader)
        best_val_error = val_error

    print('Epoch: {:03d}, LR: {:7f}, Loss: {:.7f}, Validation MAE: {:.7f}, '
          'Test MAE: {:.7f}'.format(epoch, lr, loss, val_error, test_error))

-----------training begin-------------
Epoch: 001, LR: 0.001000, Loss: 0.9404447, Validation MAE: 5.9132945, Test MAE: 5.8557571
-----------training begin-------------
Epoch: 002, LR: 0.001000, Loss: 0.8170270, Validation MAE: 5.2463203, Test MAE: 5.1783432
-----------training begin-------------
Epoch: 003, LR: 0.001000, Loss: 0.7350863, Validation MAE: 5.1489918, Test MAE: 5.0882465
-----------training begin-------------
Epoch: 004, LR: 0.001000, Loss: 0.6320461, Validation MAE: 4.2431879, Test MAE: 4.1845278
-----------training begin-------------
Epoch: 005, LR: 0.001000, Loss: 0.5415218, Validation MAE: 3.9576328, Test MAE: 3.8924049
-----------training begin-------------
Epoch: 006, LR: 0.001000, Loss: 0.5696692, Validation MAE: 3.7827002, Test MAE: 3.7261785
-----------training begin-------------
Epoch: 007, LR: 0.001000, Loss: 0.5497057, Validation MAE: 5.1088794, Test MAE: 3.7261785
-----------training begin-------------
Epoch: 008, LR: 0.001000, Loss: 0.5135244, Validation MAE: