# load Modules

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [2]:
import torch
print("PyTorch has version {}".format(torch.__version__))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

PyTorch has version 1.10.0
cuda


In [3]:
!nvcc --version

/bin/bash: nvcc: command not found


In [4]:
torch.cuda.is_available()

True

In [5]:
torch.cuda.current_device()

0

In [6]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3090'

In [7]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

## tensorboard

tensorboard --logdir=./python/run/GAT_Net/run_01

# load data

QM9
- dataset reference :  “MoleculeNet: A Benchmark for Molecular Machine Learning” 
- dataset link : https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html#torch_geometric.datasets.QM9

In [8]:
# load example dataset
from torch_geometric.datasets import QM9

dataset = QM9(root='/home/data/QM9')
print(type(dataset))
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

# #shuffle the dataset
# option 1 :
# dataset = dataset.shffule()
# option 2 :
# perm = torch.randperm(len(dataset))
# dataset_random = dataset[perm]

<class 'torch_geometric.datasets.qm9.QM9'>
Dataset: QM9(130831):
Number of graphs: 130831
Number of features: 11
Number of classes: 19


The QM9 dataset from the `"MoleculeNet: A Benchmark for Molecular Machine Learning" <https://arxiv.org/abs/1703.00564>` paper, consisting of about 130,000 molecules with 19 regression targets. Each molecule includes complete spatial information for the single low energy conformation of the atoms in the molecule. In addition, we provide the atom features from the `"Neural Message Passing for Quantum Chemistry" <https://arxiv.org/abs/1704.01212>` paper. In short, the dataset contains the (simplified) molecules composed of H, C, O, N, F. 

- 'name' contains the name of graph
- 'idx' contains the index of graph
- each node is a molecule.
- each node features is [H, C, N, O , F, atomic_number, aromatic, sp, sp2, sp3, num_hs]
    - one_hot_encoding (5 dim) + molecular properties (6 dim) = 11 dim
    - H, C, N, O, F : one hot encoding
    - aromatic : if armoatic, 1. Otherwise, 1
    - sp : if sp, 1. Otherwise, 1
    - num_hs : the number of Hydrogen atoms
- each edge is a bond between two atoms
- each edge features is one hot encoder {BT.SINGLE: 0, BT.DOUBLE: 1, BT.TRIPLE: 2, BT.AROMATIC: 3}
- 'z' contains atom number.
- 'pos' contains 3 dimensional coordinate position. [3, # of nodes]

In [9]:
#dir(dataset)

# data exploration

In [15]:
data = dataset[0]
print(data) # A graph contains 5 nodes (11 features), 8 edges (4 features). A label for graph contains 19 features.

Data(x=[5, 11], edge_index=[2, 8], edge_attr=[8, 4], y=[1, 19], pos=[5, 3], z=[5], name='gdb_1', idx=[1])


5 atoms, 8/2 (undirected) bonds

In [6]:
for key, item in data:
    print("{} found in data".format(key))

x found in data
edge_index found in data
edge_attr found in data
y found in data
pos found in data
z found in data
name found in data
idx found in data


In [7]:
data.name

'gdb_1'

In [8]:
data.idx

tensor([0])

In [9]:
data.x

tensor([[0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

In [10]:
data.x.shape

torch.Size([5, 11])

CH4  
Note that SP3 for first node sholud be 1. Data set has error.  

In [11]:
data.y

tensor([[    0.0000,    13.2100,   -10.5499,     3.1865,    13.7363,    35.3641,
             1.2177, -1101.4878, -1101.4098, -1101.3840, -1102.0229,     6.4690,
           -17.1722,   -17.2868,   -17.3897,   -16.1519,   157.7118,   157.7100,
           157.7070]])

In [12]:
data.edge_index

tensor([[0, 0, 0, 0, 1, 2, 3, 4],
        [1, 2, 3, 4, 0, 0, 0, 0]])

In [13]:
from torch_geometric.utils import dense_to_sparse, to_dense_adj
to_dense_adj(data.edge_index)

tensor([[[0., 1., 1., 1., 1.],
         [1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.]]])

In [14]:
# to select the source of edges
indices = torch.tensor([0]) 
source = torch.index_select(data.edge_index, 0, indices)

# to select the target of edges
indices = torch.tensor([1]) 
target = torch.index_select(data.edge_index, 0, indices)

In [15]:
source.shape

torch.Size([1, 8])

In [16]:
source.squeeze().shape

torch.Size([8])

In [17]:
y = torch.index_select(data.x, 0, source.squeeze())

In [18]:
y.shape

torch.Size([8, 11])

In [19]:
data.edge_attr

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.]])

In [20]:
data.edge_attr.shape

torch.Size([8, 4])

In [21]:
torch.cat((torch.index_select(data.x, 0, source.squeeze()), data.edge_attr), dim=1)

tensor([[0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.]])

In [22]:
torch.cat((data.edge_attr, y), 1)

tensor([[1., 0., 0., 0., 0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

In [23]:
source

tensor([[0, 0, 0, 0, 1, 2, 3, 4]])

In [24]:
target

tensor([[1, 2, 3, 4, 0, 0, 0, 0]])

In [25]:
data.pos

tensor([[-1.2700e-02,  1.0858e+00,  8.0000e-03],
        [ 2.2000e-03, -6.0000e-03,  2.0000e-03],
        [ 1.0117e+00,  1.4638e+00,  3.0000e-04],
        [-5.4080e-01,  1.4475e+00, -8.7660e-01],
        [-5.2380e-01,  1.4379e+00,  9.0640e-01]])

In [26]:
data.z

tensor([6, 1, 1, 1, 1])

# Prepare the dataset for training

To select Free energy among data.y

In [9]:
# shuffle the dataset
dataset = dataset.shuffle()

# normalize


# split the dataset
train_set = dataset[:round(dataset.len()*0.8)]
validation_set = dataset[round(dataset.len()*0.8):round(dataset.len()*0.9)]
test_set = dataset[round(dataset.len()*0.9):]

print("the number of total data : ", dataset.len())
print("sanity check : ", dataset.len()  == len(train_set) + len(validation_set) + len(test_set))

print("the number of train set : ", len(train_set))
print("the number of validation set : ",len(validation_set))
print("the number of test set : ",len(test_set))

the number of total data :  130831
sanity check :  True
the number of train set :  104665
the number of validation set :  13083
the number of test set :  13083


In [10]:
# batch
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_set, batch_size=32, shuffle=True, drop_last = True)
validation_loader = DataLoader(validation_set, batch_size=32, shuffle=True, drop_last = True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=True, drop_last = True)

In [11]:
#dir(loader)

# Baseline Model

In [19]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv
from torch_geometric.nn import GINConv
from torch_geometric.nn import global_add_pool
#from torch_geometric.nn import global_mean_pool
#from torch_geometric.nn import global_max_pool

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # parameters
        self.num_node_features = train_set.num_node_features
        self.num_hidden = 80
        
        # model structure
        self.conv1 = GCNConv(self.num_node_features, self.num_hidden)
        self.conv2 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv3 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv4 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv5 = GCNConv(self.num_hidden, self.num_hidden)
        self.lin1 = Linear(self.num_hidden, self.num_hidden)
        self.lin2 = Linear(self.num_hidden, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = self.conv1(x, edge_index)
        x = F.elu(x)
        
        x = self.conv2(x, edge_index)
        x = F.elu(x)
        
        x = self.conv3(x, edge_index)
        x = F.elu(x)
        
#         x = self.conv4(x, edge_index)
#         x = F.elu(x)
        
#         x = self.conv5(x, edge_index)
#         x = F.elu(x)
        
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        x = self.lin1(x)
        x = F.dropout(x, p=0.1, training=self.training)
        x = self.lin2(x)
        
        return x
            #[32, 1]


In [76]:
# import os.path as osp
# import torch
# import torch.nn.functional as F
# from torch.nn import Sequential, Linear, BatchNorm1d, ReLU
# #from torch_geometric.nn import GCNConv
# #from torch_geometric.nn import SAGEConv
# from torch_geometric.nn import GINConv, global_add_pool
# #from torch_geometric.nn import global_mean_pool
# #from torch_geometric.nn import global_max_pool

import torch
import torch.nn.functional as F
from torch.nn import Sequential, Linear, BatchNorm1d, ReLU
from torch_geometric.nn import GINConv, global_add_pool, SAGPooling


class GINConv(torch.nn.Module):
#     def __init__(self, in_channels, hidden_channels, out_channels):
#         super().__init__()
        
#         # parameters
#         self.in_channels = in_channels # train_set.num_node_features
#         self.hidden_channels = hidden_channels  
#         self.out_channels = out_channels # 16
        
#         # model structure
#         self.conv1 = GINConv(
#             Sequential(Linear(self.in_channels, self.hidden_channels), BatchNorm1d(self.hidden_channels), ReLU(),
#                        Linear(self.hidden_channels, self.hidden_channels), ReLU()))
#         self.conv2 = GINConv(
#             Sequential(Linear(self.hidden_channels, self.hidden_channels), BatchNorm1d(self.hidden_channels), ReLU(),
#                        Linear(self.hidden_channels, self.hidden_channels), ReLU()))
#         self.conv3 = GINConv(
#             Sequential(Linear(self.hidden_channels, self.hidden_channels), BatchNorm1d(self.hidden_channels), ReLU(),
#                        Linear(self.hidden_channels, self.hidden_channels), ReLU()))
#         self.conv4 = GINConv(
#             Sequential(Linear(self.hidden_channels, self.hidden_channels), BatchNorm1d(self.hidden_channels), ReLU(),
#                        Linear(self.hidden_channels, self.hidden_channels), ReLU()))
#         self.conv5 = GINConv(
#             Sequential(Linear(self.hidden_channels, self.hidden_channels), BatchNorm1d(self.hidden_channels), ReLU(),
#                        Linear(self.hidden_channels, self.hidden_channels), ReLU()))
#         self.lin1 = Linear(self.num_hidden, self.num_hidden)
#         self.lin2 = Linear(self.num_hidden, self.out_channels_channels)

    def __init__(self, in_channels):
        super(Net, self).__init__()

        self.conv1 = GINConv(Seq(Lin(in_channels, 64), ReLU(), Lin(64, 64)))
        self.pool1 = SAGPooling(64, min_score=0.001, GNN=GCNConv)
        self.conv2 = GINConv(Seq(Lin(64, 64), ReLU(), Lin(64, 64)))
        self.pool2 = SAGPooling(64, min_score=0.001, GNN=GCNConv)
        self.conv3 = GINConv(Seq(Lin(64, 64), ReLU(), Lin(64, 64)))

        self.lin = torch.nn.Linear(64, 1) 
    
    
    def forward(self, data):
        x, edge_index, batch = data.x.to(device), data.edge_index.to(device), data.batch.to(device)

        x = self.conv1(x, edge_index)
        x = F.elu(x)
        
        x = self.conv2(x, edge_index)
        x = F.elu(x)
        
        x = self.conv3(x, edge_index)
        x = F.elu(x)
        
        x = self.conv4(x, edge_index)
        x = F.elu(x)
        
        #x = self.conv5(x, edge_index)
        #x = F.elu(x)
        
        
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        x = self.lin1(x)
        x = F.dropout(x, p=0.1, training=self.training)
        x = self.lin2(x)
        return x
            #[32, 1]


In [77]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

In [78]:
#train_set[0].to(device)

In [79]:
# CPU --> GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=5, min_lr=0.000001)

In [80]:
# # CPU --> GPU
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = GINConv().to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=5, min_lr=0.00001)

In [81]:
print(model)

GCN(
  (conv1): GCNConv(11, 80)
  (conv2): GCNConv(80, 80)
  (conv3): GCNConv(80, 80)
  (conv4): GCNConv(80, 80)
  (conv5): GCNConv(80, 80)
  (lin1): Linear(in_features=80, out_features=80, bias=True)
  (lin2): Linear(in_features=80, out_features=1, bias=True)
)


In [82]:
count_parameters(model)

+------------------+------------+
|     Modules      | Parameters |
+------------------+------------+
|    conv1.bias    |     80     |
| conv1.lin.weight |    880     |
|    conv2.bias    |     80     |
| conv2.lin.weight |    6400    |
|    conv3.bias    |     80     |
| conv3.lin.weight |    6400    |
|    conv4.bias    |     80     |
| conv4.lin.weight |    6400    |
|    conv5.bias    |     80     |
| conv5.lin.weight |    6400    |
|   lin1.weight    |    6400    |
|    lin1.bias     |     80     |
|   lin2.weight    |     80     |
|    lin2.bias     |     1      |
+------------------+------------+
Total Trainable Params: 33441


33441

In [83]:
def train(epoch):
    model.train()
    loss_all = 0

    for idx, batch in enumerate(train_loader):
        batch = batch.to(device)
        y = torch.index_select(batch.y, 1, torch.tensor(10).to(device)).to(device)
        optimizer.zero_grad() #initialization
        loss = F.mse_loss(model(batch), y) # (predicted value, true value)
        loss.backward()
        loss_all += loss.item() * batch.num_graphs
        optimizer.step()  # to update the parameters
        #if idx%500 == 0:
        #    print(f"IDX: {idx:5d}\tLoss: {loss:.4f}")
            
    return loss_all / len(train_loader.dataset)

In [84]:
def test(loader):
    model.eval()
    error = 0.0
    out_all = []
    true = []
    
    for batch in loader:
        #batch = data.to(device) #it trigger error!
        out = model(batch.to(device)).to(device)
        y = torch.index_select(batch.y, 1, torch.tensor(10).to(device)).to(device)
        tmp = ((out - y)**2).to(device)
        error += tmp.sum().item()
        
        out_all.extend([x.item() for x in out])
        true.extend([x.item() for x in y])
        
        #error += (model(batch) * std - y * std).abs().sum().item()  # MAE
    return error / len(loader.dataset)

In [85]:
test(test_loader)

125923835.4799358

In [88]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=5, min_lr=0.000001)

In [89]:
from torch.utils.tensorboard import SummaryWriter
# tensorboard writer
tb_writer = SummaryWriter('run/GCN/run_10')
    ## logdir=./python/run/GAT_Net/run_02

#input parameters
total_num_epoch = 300 #the total number of epoch that have run
running_num_epoch = 300 #the number of epoch that run in this time

#running code
import time
total_time_start = time.time() # to measure time
best_validation_error = None
for epoch in range(1, running_num_epoch+1):
    epoch_time_start = time.time() # to measure time
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    validation_error = test(validation_loader)
    scheduler.step(validation_error)
    total_num_epoch = total_num_epoch + 1
    
    tb_writer.add_scalar('loss in train', loss, total_num_epoch) #tensorboard
    tb_writer.add_scalar('validation MAE', validation_error, total_num_epoch) #tensorboard
    if best_validation_error is None or validation_error <= best_validation_error:
        test_error = test(test_loader)
        best_validation_error = validation_error
    
    epoch_time_end = time.time() # to measure time
    print(f'ToTal Epoch: {total_num_epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {validation_error:.7f}, Test MAE: {test_error:.7f}, Time: {epoch_time_end - epoch_time_start}')
  
    tb_writer.add_scalar('test MAE', test_error, total_num_epoch) #tensorboard
    tb_writer.add_scalar('learning rate', lr, total_num_epoch) #tensorboard
total_time_finish = time.time() # to measure time
print(f'Done. Total Time: {total_time_finish - total_time_start}')
tb_writer.add_hparams({'num_conv': 5, 'hidden_channels' : 11}, {'hparam/total_epoch' : total_num_epoch, 'hparam/total_time' : total_time_finish - total_time_start}) #tensorboard
tb_writer.close() #tensorboard : if close() is not declared, the writer does not save any valeus.

ToTal Epoch: 301, LR: 0.000010, Loss: 171704.9833863, Val MAE: 784.2457055, Test MAE: 779.8401596, Time: 26.78816294670105
ToTal Epoch: 302, LR: 0.000010, Loss: 171333.2174903, Val MAE: 511.6222288, Test MAE: 513.5180486, Time: 25.7104172706604
ToTal Epoch: 303, LR: 0.000010, Loss: 170514.6141404, Val MAE: 490.5260780, Test MAE: 492.8555336, Time: 26.78656578063965
ToTal Epoch: 304, LR: 0.000010, Loss: 170383.0775820, Val MAE: 603.9109742, Test MAE: 492.8555336, Time: 23.423296689987183
ToTal Epoch: 305, LR: 0.000010, Loss: 171329.8163558, Val MAE: 904.7040907, Test MAE: 492.8555336, Time: 24.966158866882324
ToTal Epoch: 306, LR: 0.000010, Loss: 171793.0874421, Val MAE: 883.3468297, Test MAE: 492.8555336, Time: 22.79279351234436
ToTal Epoch: 307, LR: 0.000010, Loss: 171676.7368700, Val MAE: 821.6614908, Test MAE: 492.8555336, Time: 22.894547700881958
ToTal Epoch: 308, LR: 0.000010, Loss: 171377.9870898, Val MAE: 879.3428337, Test MAE: 492.8555336, Time: 22.52784776687622
ToTal Epoch: 3

KeyboardInterrupt: 

In [None]:
total_num_epoch = 300

best_validation_error = None
num_epoch = 100
for epoch in range(1, num_epoch+1):
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    validation_error = test(validation_loader)
    scheduler.step(validation_error)
    total_num_epoch = total_num_epoch + 1
    
    if best_validation_error is None or validation_error <= best_validation_error:
        test_error = test(test_loader)
        best_validation_error = validation_error

    print(f'Epoch: {total_num_epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {validation_error:.7f}, Test MAE: {test_error:.7f}')

Epoch: 301, LR: 0.010000, Loss: 171055.7439927, Val MAE: 749.5268311, Test MAE: 729.6581324
Epoch: 302, LR: 0.010000, Loss: 171381.7415659, Val MAE: 477.4276311, Test MAE: 455.7549674
Epoch: 303, LR: 0.010000, Loss: 171330.0292433, Val MAE: 810.1738107, Test MAE: 455.7549674
Epoch: 304, LR: 0.010000, Loss: 170884.6685138, Val MAE: 644.2184661, Test MAE: 455.7549674
Epoch: 305, LR: 0.010000, Loss: 170795.6376415, Val MAE: 725.1176548, Test MAE: 455.7549674
Epoch: 306, LR: 0.010000, Loss: 172353.8815686, Val MAE: 808.8852974, Test MAE: 455.7549674
Epoch: 307, LR: 0.010000, Loss: 172006.6827867, Val MAE: 608.6071959, Test MAE: 455.7549674
Epoch: 308, LR: 0.010000, Loss: 171673.4699255, Val MAE: 991.0886944, Test MAE: 455.7549674
Epoch: 309, LR: 0.007000, Loss: 171889.5727810, Val MAE: 592.2459863, Test MAE: 455.7549674
Epoch: 310, LR: 0.007000, Loss: 171105.7444060, Val MAE: 767.3122511, Test MAE: 455.7549674
Epoch: 311, LR: 0.007000, Loss: 171220.2660882, Val MAE: 586.7691918, Test MAE: 

In [None]:
# model.eval()
# pred = model(test_loader)
# y = torch.index_select(batch.y, 1, torch.tensor(10))
# correct = (pred == data.y[data.test_mask]).sum()
# acc = int(correct) / int(data.test_mask.sum())
# print(f'Accuracy: {acc:.4f}')

In [None]:
# experimental = [x for x in true_all]
# prediction = [x for x in out_all]

# plt.figure(figsize=(6,6))
# plt.plot(range(-1, 16), range(-1,16), 'r--')
# plt.scatter(experimental, prediction, marker = '.')
# plt.xlabel("Experimental", fontsize='xx-large')
# plt.ylabel("Prediction", fontsize='xx-large')
# plt.xlim(-0.5, 12)
# plt.ylim(-0.5, 12)

# Model save

https://tutorials.pytorch.kr/beginner/saving_loading_models.html

In [44]:
#torch.save(model, 'model/GCNConv01_epoch300_20211207')

In [87]:
model = torch.load('model/GCNConv02_epoch300_20211216')

#model = torch.load('model/GCNConv01-2_epoch300_20211207')
#model.eval()

FileNotFoundError: [Errno 2] No such file or directory: 'model/GCNConv02_epoch300_20211216'

## save checkpoint

to save checkpoint

general code  
Ref : https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html

In [None]:
# # Additional information
# EPOCH = 5
# PATH = "model.pt"
# LOSS = 0.4

# torch.save({
#             'epoch': EPOCH,
#             'model_state_dict': net.state_dict(),
#             'optimizer_state_dict': optimizer.state_dict(),
#             'loss': LOSS,
#             }, PATH)

In [54]:
EOPCH = total_num_epoch

torch.save({
            'epoch': EOPCH,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, 'model/GCNConv01_epoch400_20211207')

To load checkpoint

In [49]:
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
PATH = 'model/GCNConv01_epoch400_20211207'

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

#model.eval()
# - or -
model.train()

GCN(
  (conv1): GCNConv(11, 80)
  (conv2): GCNConv(80, 80)
  (conv3): GCNConv(80, 80)
  (conv4): GCNConv(80, 80)
  (conv5): GCNConv(80, 80)
  (lin1): Linear(in_features=80, out_features=80, bias=True)
  (lin2): Linear(in_features=80, out_features=1, bias=True)
)

# Model
- message passing neural network
    - attention
    - gate
    - Directed-MPNN
- data augmentation : 
    - global feature
    - the concatenation of the elarned molecule feature vector and the commputed global feature on the readout phase
- cross validation
- pretrain
- skip connect

## GCN_skip

In [9]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_add_pool
#from torch_geometric.nn import global_mean_pool
#from torch_geometric.nn import global_max_pool

class GCN_skip(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # parameters
        self.num_node_features = train_set.num_node_features
        self.num_hidden = 80
        
        # model structure
        self.conv1 = GCNConv(self.num_node_features, self.num_hidden)
        self.conv2 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv3 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv4 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv5 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv6 = GCNConv(self.num_hidden, self.num_hidden)
        self.conv7 = GCNConv(self.num_hidden, self.num_hidden)
        #self.lin1 = Linear(self.num_hidden, self.num_hidden)
        self.lin1 = Linear(self.num_hidden+self.num_node_features, self.num_hidden)
        self.lin2 = Linear(self.num_hidden, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x_original = data.x
        #print(x.device)
        #print(x_original.device)
        #x_original = x.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        
        x = self.conv2(x, edge_index)
        x = F.elu(x)
        
        x = self.conv3(x, edge_index)
        x = F.elu(x)
        
        x = self.conv4(x, edge_index)
        x = F.elu(x)
        
        x = self.conv5(x, edge_index)
        x = F.elu(x)
        
        x = self.conv6(x, edge_index)
        x = F.elu(x)
        
        x = self.conv7(x, edge_index)
        x = F.elu(x)
        
        x = torch.cat((x, x_original), dim=1)
        #print(x.size())
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        #x = self.lin1(x)
        x = self.lin1(x) #skip connect
        x = F.dropout(x, p=0.1, training=self.training)
        x = self.lin2(x)
        
        return x
            #[32, 1]


In [10]:
GCN_skip()

GCN_skip(
  (conv1): GCNConv(11, 80)
  (conv2): GCNConv(80, 80)
  (conv3): GCNConv(80, 80)
  (conv4): GCNConv(80, 80)
  (conv5): GCNConv(80, 80)
  (conv6): GCNConv(80, 80)
  (conv7): GCNConv(80, 80)
  (lin1): Linear(in_features=91, out_features=80, bias=True)
  (lin2): Linear(in_features=80, out_features=1, bias=True)
)

In [328]:
# CPU --> GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN_skip().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.8, patience=5, min_lr=0.00001)

In [329]:
device

device(type='cuda')

In [330]:
print(model)

GCN_skip(
  (conv1): GCNConv(11, 80)
  (conv2): GCNConv(80, 80)
  (conv3): GCNConv(80, 80)
  (conv4): GCNConv(80, 80)
  (conv5): GCNConv(80, 80)
  (conv6): GCNConv(80, 80)
  (conv7): GCNConv(80, 80)
  (lin1): Linear(in_features=91, out_features=80, bias=True)
  (lin2): Linear(in_features=80, out_features=1, bias=True)
)


In [331]:
count_parameters(model)

+------------------+------------+
|     Modules      | Parameters |
+------------------+------------+
|    conv1.bias    |     80     |
| conv1.lin.weight |    880     |
|    conv2.bias    |     80     |
| conv2.lin.weight |    6400    |
|    conv3.bias    |     80     |
| conv3.lin.weight |    6400    |
|    conv4.bias    |     80     |
| conv4.lin.weight |    6400    |
|    conv5.bias    |     80     |
| conv5.lin.weight |    6400    |
|    conv6.bias    |     80     |
| conv6.lin.weight |    6400    |
|    conv7.bias    |     80     |
| conv7.lin.weight |    6400    |
|   lin1.weight    |    7280    |
|    lin1.bias     |     80     |
|   lin2.weight    |     80     |
|    lin2.bias     |     1      |
+------------------+------------+
Total Trainable Params: 47281


47281

In [332]:
def train(epoch):
    model.train()
    loss_all = 0

    for idx, batch in enumerate(train_loader):
        batch = batch.to(device)
        y = torch.index_select(batch.y, 1, torch.tensor(10).to(device))
        optimizer.zero_grad() #initialization
        loss = F.mse_loss(model(batch), y.to(device)) # (predicted value, true value)
        loss.backward()
        loss_all += loss.item() * batch.num_graphs
        optimizer.step()  # to update the parameters
        #if idx%500 == 0:
        #    print(f"IDX: {idx:5d}\tLoss: {loss:.4f}")
            
    return loss_all / len(train_loader.dataset)

In [333]:
def test(loader):
    model.eval()
    error = 0.0
    out_all = []
    true = []
    
    for batch in loader:
        batch = batch.to(device) #it trigger error!
        out = model(batch)
        y = torch.index_select(batch.y, 1, torch.tensor(10).to(device))
        tmp = (out - y)**2
        error += tmp.sum().item()
        
        out_all.extend([x.item() for x in out])
        true.extend([x.item() for x in y])
        
        #error += (model(batch) * std - y * std).abs().sum().item()  # MAE
    return error / len(loader.dataset)

In [334]:
# check any error
test(test_loader)

125653546.22640067

In [335]:
#input parameters
total_num_epoch = 0 #the total number of epoch that have run
running_num_epoch = 300 #the number of epoch that run in this time

#running code
import time
total_time_start = time.time() # to measure time
best_validation_error = None
for epoch in range(1, running_num_epoch+1):
    epoch_time_start = time.time() # to measure time
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    validation_error = test(validation_loader)
    scheduler.step(validation_error)

    if best_validation_error is None or validation_error <= best_validation_error:
        test_error = test(test_loader)
        best_validation_error = validation_error
    
    total_num_epoch = total_num_epoch + 1
    epoch_time_end = time.time() # to measure time
    print(f'ToTal Epoch: {total_num_epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {validation_error:.7f}, Test MAE: {test_error:.7f}, Time: {epoch_time_end - epoch_time_start}')

total_time_finish = time.time() # to measure time
print(f'Done. Total Time: {total_time_finish - total_time_start}')

ToTal Epoch: 001, LR: 0.010000, Loss: 553035.5833827, Val MAE: 28769.0459996, Test MAE: 28667.6621689, Time: 46.35320591926575
ToTal Epoch: 002, LR: 0.010000, Loss: 259212.2142335, Val MAE: 28819.2577008, Test MAE: 28667.6621689, Time: 40.69047927856445
ToTal Epoch: 003, LR: 0.010000, Loss: 238199.6275450, Val MAE: 61601.1636857, Test MAE: 28667.6621689, Time: 44.202409982681274
ToTal Epoch: 004, LR: 0.010000, Loss: 107959409268.3384857, Val MAE: 59562.8773743, Test MAE: 28667.6621689, Time: 42.927422285079956
ToTal Epoch: 005, LR: 0.010000, Loss: 260502.8809631, Val MAE: 107356.7166743, Test MAE: 28667.6621689, Time: 42.91780114173889
ToTal Epoch: 006, LR: 0.010000, Loss: 256944.0292791, Val MAE: 35695.8297695, Test MAE: 28667.6621689, Time: 43.42213034629822
ToTal Epoch: 007, LR: 0.010000, Loss: 249950.2496775, Val MAE: 34070.1182236, Test MAE: 28667.6621689, Time: 46.07858180999756
ToTal Epoch: 008, LR: 0.008000, Loss: 249127.1324798, Val MAE: 42020.9864471, Test MAE: 28667.6621689,

### Model save

In [None]:
EOPCH = total_num_epoch

torch.save({
            'epoch': EOPCH,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, 'model/GCN_skip_epoch300_20211211')

To load checkpoint

In [49]:
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
PATH = 'model/GCNConv01-2_epoch300_20211207'

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

#model.eval()
# - or -
model.train()

GCN(
  (conv1): GCNConv(11, 80)
  (conv2): GCNConv(80, 80)
  (conv3): GCNConv(80, 80)
  (conv4): GCNConv(80, 80)
  (conv5): GCNConv(80, 80)
  (lin1): Linear(in_features=80, out_features=80, bias=True)
  (lin2): Linear(in_features=80, out_features=1, bias=True)
)

## GCN_skip (ensemble)

In [12]:
import torch
from torch import nn
class Ensemble(nn.Module):

    def __init__(self, modelA, modelB, modelC, input):
        super(Ensemble, self).__init__()
        self.modelA = modelA()
        self.modelB = modelB()
        self.modelC = modelC()

        self.fc1 = nn.Linear(input, 1)

    def forward(self, x):
        out1 = self.modelA.forward(x)
        out2 = self.modelB.forward(x)
        out3 = self.modelC.forward(x)
        
        # average
        out = (out1 + out2 + out3)/3
        #print(out.size())
        #x = self.fc1(out)
        return out
        #return torch.softmax(x, dim=1) #for classification

In [13]:
Ensemble(GCN_skip, GCN_skip, GCN_skip, 3)

Ensemble(
  (modelA): GCN_skip(
    (conv1): GCNConv(11, 80)
    (conv2): GCNConv(80, 80)
    (conv3): GCNConv(80, 80)
    (conv4): GCNConv(80, 80)
    (conv5): GCNConv(80, 80)
    (conv6): GCNConv(80, 80)
    (conv7): GCNConv(80, 80)
    (lin1): Linear(in_features=91, out_features=80, bias=True)
    (lin2): Linear(in_features=80, out_features=1, bias=True)
  )
  (modelB): GCN_skip(
    (conv1): GCNConv(11, 80)
    (conv2): GCNConv(80, 80)
    (conv3): GCNConv(80, 80)
    (conv4): GCNConv(80, 80)
    (conv5): GCNConv(80, 80)
    (conv6): GCNConv(80, 80)
    (conv7): GCNConv(80, 80)
    (lin1): Linear(in_features=91, out_features=80, bias=True)
    (lin2): Linear(in_features=80, out_features=1, bias=True)
  )
  (modelC): GCN_skip(
    (conv1): GCNConv(11, 80)
    (conv2): GCNConv(80, 80)
    (conv3): GCNConv(80, 80)
    (conv4): GCNConv(80, 80)
    (conv5): GCNConv(80, 80)
    (conv6): GCNConv(80, 80)
    (conv7): GCNConv(80, 80)
    (lin1): Linear(in_features=91, out_features=80, bias=

In [20]:
# CPU --> GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Ensemble(GCN_skip, GCN_skip, GCN_skip, 3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.5, patience=5, min_lr=0.00001)

In [21]:
device

device(type='cuda')

In [22]:
print(model)

Ensemble(
  (modelA): GCN_skip(
    (conv1): GCNConv(11, 80)
    (conv2): GCNConv(80, 80)
    (conv3): GCNConv(80, 80)
    (conv4): GCNConv(80, 80)
    (conv5): GCNConv(80, 80)
    (conv6): GCNConv(80, 80)
    (conv7): GCNConv(80, 80)
    (lin1): Linear(in_features=91, out_features=80, bias=True)
    (lin2): Linear(in_features=80, out_features=1, bias=True)
  )
  (modelB): GCN_skip(
    (conv1): GCNConv(11, 80)
    (conv2): GCNConv(80, 80)
    (conv3): GCNConv(80, 80)
    (conv4): GCNConv(80, 80)
    (conv5): GCNConv(80, 80)
    (conv6): GCNConv(80, 80)
    (conv7): GCNConv(80, 80)
    (lin1): Linear(in_features=91, out_features=80, bias=True)
    (lin2): Linear(in_features=80, out_features=1, bias=True)
  )
  (modelC): GCN_skip(
    (conv1): GCNConv(11, 80)
    (conv2): GCNConv(80, 80)
    (conv3): GCNConv(80, 80)
    (conv4): GCNConv(80, 80)
    (conv5): GCNConv(80, 80)
    (conv6): GCNConv(80, 80)
    (conv7): GCNConv(80, 80)
    (lin1): Linear(in_features=91, out_features=80, bias=

In [23]:
count_parameters(model)

+-------------------------+------------+
|         Modules         | Parameters |
+-------------------------+------------+
|    modelA.conv1.bias    |     80     |
| modelA.conv1.lin.weight |    880     |
|    modelA.conv2.bias    |     80     |
| modelA.conv2.lin.weight |    6400    |
|    modelA.conv3.bias    |     80     |
| modelA.conv3.lin.weight |    6400    |
|    modelA.conv4.bias    |     80     |
| modelA.conv4.lin.weight |    6400    |
|    modelA.conv5.bias    |     80     |
| modelA.conv5.lin.weight |    6400    |
|    modelA.conv6.bias    |     80     |
| modelA.conv6.lin.weight |    6400    |
|    modelA.conv7.bias    |     80     |
| modelA.conv7.lin.weight |    6400    |
|    modelA.lin1.weight   |    7280    |
|     modelA.lin1.bias    |     80     |
|    modelA.lin2.weight   |     80     |
|     modelA.lin2.bias    |     1      |
|    modelB.conv1.bias    |     80     |
| modelB.conv1.lin.weight |    880     |
|    modelB.conv2.bias    |     80     |
| modelB.conv2.l

141847

In [24]:
def train(epoch):
    model.train()
    loss_all = 0

    for idx, batch in enumerate(train_loader):
        batch = batch.to(device)
        y = torch.index_select(batch.y, 1, torch.tensor(10).to(device))
        optimizer.zero_grad() #initialization
        loss = F.mse_loss(model(batch), y.to(device)) # (predicted value, true value)
        loss.backward()
        loss_all += loss.item() * batch.num_graphs
        optimizer.step()  # to update the parameters
        #if idx%500 == 0:
        #    print(f"IDX: {idx:5d}\tLoss: {loss:.4f}")
            
    return loss_all / len(train_loader.dataset)

In [25]:
def test(loader):
    model.eval()
    error = 0.0
    out_all = []
    true = []
    
    for batch in loader:
        batch = batch.to(device) #it trigger error!
        out = model(batch)
        y = torch.index_select(batch.y, 1, torch.tensor(10).to(device))
        tmp = (out - y)**2
        error += tmp.sum().item()
        
        out_all.extend([x.item() for x in out])
        true.extend([x.item() for x in y])
        
        #error += (model(batch) * std - y * std).abs().sum().item()  # MAE
    return error / len(loader.dataset)

In [26]:
# check any error
test(test_loader)

125924755.83153711

In [27]:
#input parameters
total_num_epoch = 0 #the total number of epoch that have run
running_num_epoch = 300 #the number of epoch that run in this time

#running code
import time
total_time_start = time.time() # to measure time
best_validation_error = None
for epoch in range(1, running_num_epoch+1):
    epoch_time_start = time.time() # to measure time
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    validation_error = test(validation_loader)
    scheduler.step(validation_error)

    if best_validation_error is None or validation_error <= best_validation_error:
        test_error = test(test_loader)
        best_validation_error = validation_error
    
    total_num_epoch = total_num_epoch + 1
    epoch_time_end = time.time() # to measure time
    print(f'ToTal Epoch: {total_num_epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {validation_error:.7f}, Test MAE: {test_error:.7f}, Time: {epoch_time_end - epoch_time_start}')

total_time_finish = time.time() # to measure time
print(f'Done. Total Time: {total_time_finish - total_time_start}')

ToTal Epoch: 001, LR: 0.010000, Loss: 976162.2059165, Val MAE: 32405.9037276, Test MAE: 33848.4269950, Time: 180.88196778297424
ToTal Epoch: 002, LR: 0.010000, Loss: 112609.3658912, Val MAE: 52624.5011561, Test MAE: 33848.4269950, Time: 177.63085198402405
ToTal Epoch: 003, LR: 0.010000, Loss: 100776.7408524, Val MAE: 14470.1997499, Test MAE: 14347.1983418, Time: 141.5558626651764
ToTal Epoch: 004, LR: 0.010000, Loss: 13614942.6128033, Val MAE: 21709.9153840, Test MAE: 14347.1983418, Time: 131.98660326004028
ToTal Epoch: 005, LR: 0.010000, Loss: 256287.6157610, Val MAE: 3309207.1885653, Test MAE: 14347.1983418, Time: 163.6609981060028
ToTal Epoch: 006, LR: 0.010000, Loss: 277581.5962679, Val MAE: 9635.1265000, Test MAE: 10049.6434579, Time: 189.06312084197998
ToTal Epoch: 007, LR: 0.010000, Loss: 95557.4747623, Val MAE: 19379.0227741, Test MAE: 10049.6434579, Time: 135.97876453399658
ToTal Epoch: 008, LR: 0.010000, Loss: 97388.9832364, Val MAE: 20366.9286706, Test MAE: 10049.6434579, Ti

KeyboardInterrupt: 

### Model save

In [28]:
EOPCH = total_num_epoch

torch.save({
            'epoch': EOPCH,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, 'model/GCN_skip_ensemble_epoch300_20211212')

To load checkpoint

In [49]:
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
PATH = 'model/GCNConv01-2_epoch300_20211207'

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

#model.eval()
# - or -
model.train()

GCN(
  (conv1): GCNConv(11, 80)
  (conv2): GCNConv(80, 80)
  (conv3): GCNConv(80, 80)
  (conv4): GCNConv(80, 80)
  (conv5): GCNConv(80, 80)
  (lin1): Linear(in_features=80, out_features=80, bias=True)
  (lin2): Linear(in_features=80, out_features=1, bias=True)
)

## GAT

Cross Validation

In [90]:
# batch
from torch_geometric.loader import DataLoader
batch_size= 256
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last = True)
validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=True, drop_last = True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, drop_last = True)

In [98]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GATConv, SAGPooling, global_add_pool

class GAT_Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super().__init__()
        # parameters
        self.in_channels = in_channels # dim(node features)
        self.hidden_channels = hidden_channels # dim(node embedding)
        self.out_channels = out_channels
        self.heads = heads
        #self.layers1 = [d_graph_layer for i in range(n_graph_layer+1)]
        #self.GATConvList = nn.ModuleList([GAT_gate(self.layers1[i], self.layers1[i+1]) for i in range(len(self.layers1)-1)])
        #self.depth = depth # the number of message passing
        
        # model structure
        self.lin0 = Linear(self.in_channels, self.in_channels)
        self.conv1 = GATConv(in_channels= self.in_channels, out_channels= self.hidden_channels, heads=self.heads)
        self.MultiHeadAttention_lin1 = Linear(self.hidden_channels * self.heads, self.hidden_channels)
        #self.pool1 = SAGPooling(in_channels= self.hidden_channels*self.heads , ratio=1/self.heads, GNN=GATConv, min_score=0.001)
        self.conv2 = GATConv(in_channels= self.hidden_channels, out_channels= self.hidden_channels, heads=self.heads)
        #self.MultiHeadAttention_lin2 = Linear(self.hidden_channels * self.heads, self.hidden_channels)
        self.conv3 = GATConv(in_channels= self.hidden_channels, out_channels= self.hidden_channels, heads=self.heads)
        #self.MultiHeadAttention_lin3 = Linear(self.hidden_channels * self.heads, self.hidden_channels)
        #self.conv4 = GATConv(in_channels= self.hidden_channels, out_channels= self.hidden_channels, heads=self.heads)
        #self.MultiHeadAttention_lin4 = Linear(self.hidden_channels * self.heads, self.hidden_channels)
        #self.conv5 = GATConv(in_channels= self.hidden_channels, out_channels= self.hidden_channels, heads=1)
        #self.skip = Linear(self.hidden_channels * 2, 1)
        self.lin1 = Linear(self.hidden_channels, self.hidden_channels) # readout function
        self.lin2 = Linear(self.hidden_channels, self.out_channels) # predictor

    def forward(self, data):
        x, edge_index, batch = data.x.to(device), data.edge_index.to(device), data.batch.to(device)
         
        x = self.lin0(x)
    
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.MultiHeadAttention_lin1(x)
        x = F.dropout(x, p=0.2, training=self.training)
        
        x = self.conv2(x, edge_index)
        x = F.elu(x)
        x = self.MultiHeadAttention_lin1(x) #shared
        x = F.dropout(x, p=0.2, training=self.training)
        
        x = self.conv3(x, edge_index)
        x = F.elu(x)
        x = self.MultiHeadAttention_lin1(x)
        
        #x = self.conv4(x, edge_index)
        #x = F.elu(x)
        #x = self.MultiHeadAttention_lin1(x)
        
        #x = self.conv5(x, edge_index)
        #x = F.elu(x)
        
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        x = self.lin1(x)
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.lin2(x)
        return x

In [99]:
GAT_Net(in_channels= 11, hidden_channels= 150, out_channels= 1, heads=15)

GAT_Net(
  (lin0): Linear(in_features=11, out_features=11, bias=True)
  (conv1): GATConv(11, 150, heads=15)
  (MultiHeadAttention_lin1): Linear(in_features=2250, out_features=150, bias=True)
  (conv2): GATConv(150, 150, heads=15)
  (conv3): GATConv(150, 150, heads=15)
  (lin1): Linear(in_features=150, out_features=150, bias=True)
  (lin2): Linear(in_features=150, out_features=1, bias=True)
)

In [142]:
# CPU --> GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GAT_Net(in_channels= 11, hidden_channels= 150, out_channels= 1, heads=7).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.95)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=30, min_lr=0.0001)

In [119]:
device

device(type='cuda')

In [120]:
print(model)

GAT_Net(
  (lin0): Linear(in_features=11, out_features=11, bias=True)
  (conv1): GATConv(11, 150, heads=7)
  (MultiHeadAttention_lin1): Linear(in_features=1050, out_features=150, bias=True)
  (conv2): GATConv(150, 150, heads=7)
  (conv3): GATConv(150, 150, heads=7)
  (lin1): Linear(in_features=150, out_features=150, bias=True)
  (lin2): Linear(in_features=150, out_features=1, bias=True)
)


In [121]:
count_parameters(model)

+--------------------------------+------------+
|            Modules             | Parameters |
+--------------------------------+------------+
|          lin0.weight           |    121     |
|           lin0.bias            |     11     |
|         conv1.att_src          |    1050    |
|         conv1.att_dst          |    1050    |
|           conv1.bias           |    1050    |
|      conv1.lin_src.weight      |   11550    |
| MultiHeadAttention_lin1.weight |   157500   |
|  MultiHeadAttention_lin1.bias  |    150     |
|         conv2.att_src          |    1050    |
|         conv2.att_dst          |    1050    |
|           conv2.bias           |    1050    |
|      conv2.lin_src.weight      |   157500   |
|         conv3.att_src          |    1050    |
|         conv3.att_dst          |    1050    |
|           conv3.bias           |    1050    |
|      conv3.lin_src.weight      |   157500   |
|          lin1.weight           |   22500    |
|           lin1.bias            |    15

516583

### single batch

In [13]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GATConv, SAGPooling, global_add_pool

class GAT_overfitting_Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super().__init__()
        # parameters
        self.in_channels = in_channels # dim(node features)
        self.hidden_channels = hidden_channels # dim(node embedding)
        self.out_channels = out_channels
        self.heads = heads
 
        # model structure
        self.lin0 = Linear(self.in_channels, self.in_channels)
        self.conv1 = GATConv(in_channels= self.in_channels, out_channels= self.hidden_channels, heads=self.heads)
        self.MultiHeadAttention_lin1 = Linear(self.hidden_channels * self.heads, self.hidden_channels)
        self.conv3 = GATConv(in_channels= self.hidden_channels, out_channels= self.hidden_channels, heads=self.heads)
        self.lin1 = Linear(self.hidden_channels, self.hidden_channels) # readout function
        self.lin2 = Linear(self.hidden_channels, self.out_channels) # predictor

    def forward(self, data):
        x, edge_index, batch = data.x.to(device), data.edge_index.to(device), data.batch.to(device)
         
        x = self.lin0(x)
    
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.MultiHeadAttention_lin1(x)   
        
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        x = self.MultiHeadAttention_lin1(x)
        
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        x = self.lin1(x)
        x = self.lin2(x)
        return x

In [14]:
# CPU --> GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GAT_overfitting_Net(in_channels= 11, hidden_channels= 10, out_channels= 1, heads=1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=10, min_lr=0.0025)
print(device)
print('-------------------------------')
print(next(model.parameters()).is_cuda)

cuda
-------------------------------
True


In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0025, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=10, min_lr=0.0005)

In [15]:
def overfit_single_batch(device, total_num_epoch, running_num_epoch, tf_board_directory, model_save_directory):
    # load modules
    from torch.utils.tensorboard import SummaryWriter
    import time
    writer = SummaryWriter(tf_board_directory)
    ## logdir=./python/run/GAT_Net/run_02
    
    model.train()
    loss_all = 0
    epoch = int(running_num_epoch)
    
    for iteration in range(epoch):
        # At iteration =0, load a single batch (without training)
        if iteration == 0 :
            for idx, batch in enumerate(train_loader):
                batch = batch.to(device)
                break
        # At ieration >=1, train a single batch until overfitting
        else :
            epoch_time_start = time.time() # to measure time
            lr = scheduler.optimizer.param_groups[0]['lr']
            y = torch.index_select(batch.y.to(device), 1, torch.tensor(10).to(device)).to(device)
            optimizer.zero_grad() #initialization
            loss = F.mse_loss(model(batch), y.to(device)) # (predicted value, true value)
            #print(model(batch))
            #print(y.to(device))
            #print(loss)
            loss.backward() # compute gradient
            loss_all += loss.item() * batch.num_graphs
            #print(loss.item())
            #print(batch.num_graphs)
            #print(loss_all)
            optimizer.step()  # to update the parameters
            scheduler.step(loss_all)
            epoch_time_end = time.time() # to measure time
            total_num_epoch = total_num_epoch + 1
            writer.add_scalar('loss in train', loss, total_num_epoch) #tensorboard
            writer.add_scalar('learning rate', lr, total_num_epoch) #tensorboard
        # print        
        if iteration != 0 and iteration%1000 == 0:
                print(f"IDX: {idx:2d}\tTotal Epoch: {total_num_epoch:10d}\tLR: {lr:7f}\tLoss: {loss:.6f}\tTime: {epoch_time_end - epoch_time_start:.3f}")
        
        # overfitting
        if iteration != 0 and loss_all < 1e-1:
            print(f"overfitting is reached on epoch {iteration:10d}")
            break

    writer.close() #tensorboard : if close() is not declared, the writer does not save any valeus.
    
    # model save
    torch.save({
            'epoch': total_num_epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, model_save_directory)
    
    print("-------------------------done------------------------------")

In [None]:
overfit_single_batch(device='cuda', total_num_epoch=665000, running_num_epoch=1e10, tf_board_directory='run/GAT_overfiting_Net/run_01', model_save_directory='model/GAT_SingleOverfit_epoch1e_run01_20211219')

IDX:     0	Total Epoch:     666000	LR: 0.000500	Loss: 288749.562500	Time: 0.014
IDX:     0	Total Epoch:     667000	LR: 0.000500	Loss: 227174.515625	Time: 0.013
IDX:     0	Total Epoch:     668000	LR: 0.000500	Loss: 208977.218750	Time: 0.014
IDX:     0	Total Epoch:     669000	LR: 0.000500	Loss: 188319.656250	Time: 0.015
IDX:     0	Total Epoch:     670000	LR: 0.000500	Loss: 186028.750000	Time: 0.014
IDX:     0	Total Epoch:     671000	LR: 0.000500	Loss: 185725.187500	Time: 0.014
IDX:     0	Total Epoch:     672000	LR: 0.000500	Loss: 180717.062500	Time: 0.015
IDX:     0	Total Epoch:     673000	LR: 0.000500	Loss: 180471.531250	Time: 0.014
IDX:     0	Total Epoch:     674000	LR: 0.000500	Loss: 179364.875000	Time: 0.015
IDX:     0	Total Epoch:     675000	LR: 0.000500	Loss: 178899.562500	Time: 0.014
IDX:     0	Total Epoch:     676000	LR: 0.000500	Loss: 179254.734375	Time: 0.015
IDX:     0	Total Epoch:     677000	LR: 0.000500	Loss: 178804.015625	Time: 0.014
IDX:     0	Total Epoch:     678000	LR: 0

In [122]:
def train(epoch):
    model.train()
    loss_all = 0

    for idx, batch in enumerate(train_loader):
        batch = batch.to(device)
        #y = torch.index_select(batch.y.to(device), 1, torch.tensor(10).to(device)).to(device)
        y = torch.index_select(batch.y, 1, torch.tensor(10).to(device)).to(device)
        optimizer.zero_grad() #initialization
        loss = F.mse_loss(model(batch), y) # (predicted value, true value)
        loss.backward()
        loss_all += loss.item() * batch.num_graphs
        optimizer.step()  # to update the parameters
        #if idx%500 == 0:
        #    print(f"IDX: {idx:5d}\tLoss: {loss:.4f}")
            
    return loss_all / len(train_loader.dataset)

In [123]:
def test(loader):
    model.eval()
    error = 0.0
    out_all = []
    true = []
    
    for batch in loader:
        #batch = data.to(device) #it trigger error!
        out = model(batch).to(device)
        y = torch.index_select(batch.y.to(device), 1, torch.tensor(10).to(device)).to(device)
        tmp = (out.to(device) - y.to(device))**2
        error += tmp.sum().item()
        
        out_all.extend([x.item() for x in out])
        true.extend([x.item() for x in y])
        
        #error += (model(batch) * std - y * std).abs().sum().item()  # MAE
    return error / len(loader.dataset)

In [124]:
test(test_loader)

125901953.06642208

In [None]:
#################################################################################
# load modules and set parameters
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('run/GAT_Net/run_10')
    ## logdir=./python/run/GAT_Net/run_02
#input parameters
total_num_epoch = 1500 #the total number of epoch that have run
running_num_epoch = 2000 #the number of epoch that run in this time

optimizer = torch.optim.Adam(model.parameters(), lr=3e-6, weight_decay=0.95)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=50, min_lr=1e-8)

#################################################################################
#running code
import time
total_time_start = time.time() # to measure time
best_validation_error = None
for epoch in range(1, running_num_epoch+1):
    epoch_time_start = time.time() # to measure time
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    validation_error = test(validation_loader)
    scheduler.step(validation_error)

    # to save the metrics
    if best_validation_error is None or validation_error <= best_validation_error:
        best_validation_error = validation_error
    test_error = test(test_loader)
    total_num_epoch = total_num_epoch + 1
    epoch_time_end = time.time() # to measure time    
    writer.add_scalar('loss in train', loss, total_num_epoch) #tensorboard
    writer.add_scalar('validation MAE', validation_error, total_num_epoch) #tensorboard    
    writer.add_scalar('test MAE', test_error, total_num_epoch) #tensorboard
    writer.add_scalar('learning rate', lr, total_num_epoch) #tensorboard
    print(f'ToTal Epoch: {total_num_epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {validation_error:.7f}, Test MAE: {test_error:.7f}, Time: {epoch_time_end - epoch_time_start}')

    total_time_finish = time.time() # to measure time
print(f'Done. Total running Time: {total_time_finish - total_time_start}')
writer.close() #tensorboard : if close() is not declared, the writer does not save any valeus.

ToTal Epoch: 1501, LR: 0.000003, Loss: 209667.2944728, Val MAE: 3057.4786555, Test MAE: 3022.0600636, Time: 13.274787664413452
ToTal Epoch: 1502, LR: 0.000003, Loss: 211572.6046720, Val MAE: 2839.5998242, Test MAE: 2804.4568620, Time: 13.883838176727295
ToTal Epoch: 1503, LR: 0.000003, Loss: 208501.2974347, Val MAE: 3294.2207493, Test MAE: 2804.4568620, Time: 11.774081468582153
ToTal Epoch: 1504, LR: 0.000003, Loss: 209348.4432427, Val MAE: 2524.8147166, Test MAE: 2496.3766863, Time: 13.53332781791687
ToTal Epoch: 1505, LR: 0.000003, Loss: 210454.1956146, Val MAE: 2276.2463383, Test MAE: 2260.4902808, Time: 12.313076257705688
ToTal Epoch: 1506, LR: 0.000003, Loss: 209481.4090288, Val MAE: 3012.9342037, Test MAE: 2260.4902808, Time: 11.753117084503174
ToTal Epoch: 1507, LR: 0.000003, Loss: 210598.7201452, Val MAE: 2742.1497076, Test MAE: 2260.4902808, Time: 11.22588038444519
ToTal Epoch: 1508, LR: 0.000003, Loss: 211213.5353365, Val MAE: 2722.0892045, Test MAE: 2260.4902808, Time: 10.87

tensorboard command :   
tensorboard --logdir=./python/run/GAT_Net/run_01

### Model save

In [None]:
torch.save({
            'epoch': total_num_epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, 'model/GAT_epoch2000_run05_20211216')

To load checkpoint

In [49]:
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
PATH = 'model/GCNConv01-2_epoch300_20211207'

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

#model.eval()
# - or -
model.train()

GCN(
  (conv1): GCNConv(11, 80)
  (conv2): GCNConv(80, 80)
  (conv3): GCNConv(80, 80)
  (conv4): GCNConv(80, 80)
  (conv5): GCNConv(80, 80)
  (lin1): Linear(in_features=80, out_features=80, bias=True)
  (lin2): Linear(in_features=80, out_features=1, bias=True)
)

## GAT_gate

In [12]:
# batch
from torch_geometric.loader import DataLoader
batch_size= 256
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last = True)
validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=True, drop_last = True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, drop_last = True)

In [13]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import time

class GAT_gate(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GAT_gate, self).__init__()
        self.W = nn.Linear(in_channels, out_channels)
        #self.A = nn.Parameter(torch.Tensor(n_out_feature, n_out_feature))
        self.A = nn.Parameter(torch.zeros(size=(out_channels, out_channels)))
        self.gate = nn.Linear(out_channels*2, 1)
        self.leakyrelu = nn.LeakyReLU(0.2)

    def forward(self, x, adj):
        h = self.W(x)
        batch_size = h.size()[0]
        N = h.size()[1]
        #print(h.size(), self.A.size())
        #print(torch.matmul(h,self.A).size())
        #[original] e = torch.einsum('ijl,ikl->ijk', (torch.matmul(h,self.A), h))
        e = torch.einsum('jl,kl->jk', (torch.matmul(h,self.A), h))
        #[original] e = e + e.permute((0,2,1))
        e = e + e.permute((1,0))
        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        #attention = F.dropout(attention, self.dropout, training=self.training)
        #h_prime = torch.matmul(attention, h)
        attention = attention*adj
        #[original] h_prime = F.relu(torch.einsum('aij,ajk->aik',(attention, h)))
        #h_prime = F.relu(torch.einsum('ij,jk->ik',(attention, h)))
        #print(attention.size(), h.size())
        h_prime = F.relu(torch.matmul(attention.permute(0, 1, 2)[-1, :, :], h))
        #[original] coeff = torch.sigmoid(self.gate(torch.cat([x,h_prime], -1))).repeat(1,1,x.size(-1))
        #print(x.size(), h_prime.size())
        coeff = torch.sigmoid(self.gate(torch.cat([x,h_prime], dim=1)))
        retval = coeff*x+(1-coeff)*h_prime
        return retval

In [14]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GATConv, SAGPooling, global_add_pool
from torch_geometric.utils import dense_to_sparse, to_dense_adj


class GAT_gate_Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, FC_channels, out_channels):
        super().__init__()
        # parameters
        self.in_channels = in_channels # dim(node features)
        self.hidden_channels = hidden_channels # dim(node embedding)
        self.FC_channels = FC_channels # fully connected layers
        self.out_channels = out_channels
        #self.layers1 = [d_graph_layer for i in range(n_graph_layer+1)]
        #self.GATConvList = nn.ModuleList([GAT_gate(self.layers1[i], self.layers1[i+1]) for i in range(len(self.layers1)-1)])
        #self.depth = depth # the number of message passing
        
        # model structurec_hs = F.dropout(c_hs, p=self.dropout_rate, training=self.training)
        self.embede = nn.Linear(in_channels, hidden_channels, bias = False)
        self.conv1 = GAT_gate(in_channels= self.hidden_channels, out_channels= self.hidden_channels)
        self.conv2 = GAT_gate(in_channels= self.hidden_channels, out_channels= self.hidden_channels)
        self.conv3 = GAT_gate(in_channels= self.hidden_channels, out_channels= self.hidden_channels)
        self.conv4 = GAT_gate(in_channels= self.hidden_channels, out_channels= self.hidden_channels)
        self.conv5 = GAT_gate(in_channels= self.hidden_channels, out_channels= self.hidden_channels)
        self.FC1 = Linear(self.hidden_channels, self.FC_channels) # readout function
        self.FC2 = Linear(self.FC_channels, self.FC_channels)
        self.predict = Linear(self.FC_channels, self.out_channels) # predictor

    def forward(self, data):
        x, edge_index, batch = data.x.to(device), data.edge_index.to(device), data.batch.to(device)
        adj = to_dense_adj(data.edge_index).to(device)
        
        x = self.embede(x)
        
        x = self.conv1(x, adj)
        x = F.elu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        
        x = self.conv2(x, adj)
        x = F.elu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        
        x = self.conv3(x, adj)
        x = F.elu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        
        x = self.conv4(x, adj)
        x = F.elu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        
        x = self.conv5(x, adj)
        x = F.elu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        x = self.FC1(x)
        x = F.dropout(x, p=0.3, training=self.training)
        
        x = self.FC2(x)
        x = F.dropout(x, p=0.3, training=self.training)
        
        x = self.predict(x)
        return x

In [300]:
# CPU --> GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GAT_gate_Net(in_channels= 11, hidden_channels= 140, FC_channels= 128, out_channels= 1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=10, min_lr=0.0025)
print(device)
print('-------------------------------')
print(model)

cuda
-------------------------------
GAT_gate_Net(
  (embede): Linear(in_features=11, out_features=140, bias=False)
  (conv1): GAT_gate(
    (W): Linear(in_features=140, out_features=140, bias=True)
    (gate): Linear(in_features=280, out_features=1, bias=True)
    (leakyrelu): LeakyReLU(negative_slope=0.2)
  )
  (conv2): GAT_gate(
    (W): Linear(in_features=140, out_features=140, bias=True)
    (gate): Linear(in_features=280, out_features=1, bias=True)
    (leakyrelu): LeakyReLU(negative_slope=0.2)
  )
  (conv3): GAT_gate(
    (W): Linear(in_features=140, out_features=140, bias=True)
    (gate): Linear(in_features=280, out_features=1, bias=True)
    (leakyrelu): LeakyReLU(negative_slope=0.2)
  )
  (conv4): GAT_gate(
    (W): Linear(in_features=140, out_features=140, bias=True)
    (gate): Linear(in_features=280, out_features=1, bias=True)
    (leakyrelu): LeakyReLU(negative_slope=0.2)
  )
  (conv5): GAT_gate(
    (W): Linear(in_features=140, out_features=140, bias=True)
    (gate): L

In [301]:
count_parameters(model)

+-------------------+------------+
|      Modules      | Parameters |
+-------------------+------------+
|   embede.weight   |    1540    |
|      conv1.A      |   19600    |
|   conv1.W.weight  |   19600    |
|    conv1.W.bias   |    140     |
| conv1.gate.weight |    280     |
|  conv1.gate.bias  |     1      |
|      conv2.A      |   19600    |
|   conv2.W.weight  |   19600    |
|    conv2.W.bias   |    140     |
| conv2.gate.weight |    280     |
|  conv2.gate.bias  |     1      |
|      conv3.A      |   19600    |
|   conv3.W.weight  |   19600    |
|    conv3.W.bias   |    140     |
| conv3.gate.weight |    280     |
|  conv3.gate.bias  |     1      |
|      conv4.A      |   19600    |
|   conv4.W.weight  |   19600    |
|    conv4.W.bias   |    140     |
| conv4.gate.weight |    280     |
|  conv4.gate.bias  |     1      |
|      conv5.A      |   19600    |
|   conv5.W.weight  |   19600    |
|    conv5.W.bias   |    140     |
| conv5.gate.weight |    280     |
|  conv5.gate.bias  

234334

In [302]:
#from torchsummary import summary
#summary(model, input_size=())
#summary(model, 'cpu')
#summary(model, 'cuda')

In [23]:
def train(epoch):
    model.train()
    loss_all = 0

    for idx, batch in enumerate(train_loader):
        #batch = batch.to(device)
        #y = torch.index_select(batch.y.to(device), 1, torch.tensor(10).to(device)).to(device)
        y = torch.index_select(batch.y, 1, torch.tensor(10))
        optimizer.zero_grad() #initialization
        loss = F.mse_loss(model(batch), y.to(device)) # (predicted value, true value)
        loss.backward()
        loss_all += loss.item() * batch.num_graphs
        optimizer.step()  # to update the parameters
        #if idx%500 == 0:
        #    print(f"IDX: {idx:5d}\tLoss: {loss:.4f}")
            
    return loss_all / len(train_loader.dataset)

In [24]:
def test(loader):
    model.eval()
    error = 0.0
    out_all = []
    true = []
    
    for batch in loader:
        #batch = data.to(device) #it trigger error!
        out = model(batch)
        out = out
        y = torch.index_select(batch.y, 1, torch.tensor(10)).to(device)
        tmp = (out - y)**2
        error += tmp.sum().item()
        
        out_all.extend([x.item() for x in out])
        true.extend([x.item() for x in y])
        
        #error += (model(batch) * std - y * std).abs().sum().item()  # MAE
    return error / len(loader.dataset)

In [42]:
test(test_loader)

847037.6311243599

In [48]:
tb_writer.close()

In [51]:
from torch.utils.tensorboard import SummaryWriter
# tensorboard writer
tb_writer = SummaryWriter('run/GAT_Net/run_09')
    ## logdir=./python/run/GAT_Net/run_02

#input parameters
total_num_epoch = 416 #the total number of epoch that have run
running_num_epoch = 300 #the number of epoch that run in this time

#running code
import time
total_time_start = time.time() # to measure time
best_validation_error = None
for epoch in range(1, running_num_epoch+1):
    epoch_time_start = time.time() # to measure time
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    validation_error = test(validation_loader)
    scheduler.step(validation_error)
    total_num_epoch = total_num_epoch + 1
    
    tb_writer.add_scalar('loss in train', loss, total_num_epoch) #tensorboard
    tb_writer.add_scalar('validation MAE', validation_error, total_num_epoch) #tensorboard
    if best_validation_error is None or validation_error <= best_validation_error:
        test_error = test(test_loader)
        best_validation_error = validation_error
    
    epoch_time_end = time.time() # to measure time
    print(f'ToTal Epoch: {total_num_epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {validation_error:.7f}, Test MAE: {test_error:.7f}, Time: {epoch_time_end - epoch_time_start}')
  
    tb_writer.add_scalar('test MAE', test_error, total_num_epoch) #tensorboard
    tb_writer.add_scalar('learning rate', lr, total_num_epoch) #tensorboard
total_time_finish = time.time() # to measure time
print(f'Done. Total Time: {total_time_finish - total_time_start}')
tb_writer.add_hparams({'num_conv': 5, 'hidden_channels' : 11}, {'hparam/total_epoch' : total_num_epoch, 'hparam/total_time' : total_time_finish - total_time_start}) #tensorboard
tb_writer.close() #tensorboard : if close() is not declared, the writer does not save any valeus.

ToTal Epoch: 417, LR: 0.002500, Loss: 700126.3143171, Val MAE: 1199936.8878698, Test MAE: 1226311.5774669, Time: 52.01580333709717
ToTal Epoch: 418, LR: 0.002500, Loss: 710971.8143028, Val MAE: 981658.8146450, Test MAE: 1004195.4304059, Time: 52.186503171920776
ToTal Epoch: 419, LR: 0.002500, Loss: 703730.3679358, Val MAE: 1109378.7064129, Test MAE: 1004195.4304059, Time: 45.445844888687134
ToTal Epoch: 420, LR: 0.002500, Loss: 784161.2004777, Val MAE: 589979.6303600, Test MAE: 607340.1378889, Time: 52.88370752334595
ToTal Epoch: 421, LR: 0.002500, Loss: 730078.9413844, Val MAE: 732566.4902545, Test MAE: 607340.1378889, Time: 47.67762565612793
ToTal Epoch: 422, LR: 0.002500, Loss: 706971.4072135, Val MAE: 1129909.4568524, Test MAE: 607340.1378889, Time: 49.83571004867554
ToTal Epoch: 423, LR: 0.002500, Loss: 713487.9789041, Val MAE: 1009972.1910877, Test MAE: 607340.1378889, Time: 49.136478662490845
ToTal Epoch: 424, LR: 0.002500, Loss: 733790.7174318, Val MAE: 819762.5706642, Test MAE

In [55]:
torch.save({
            'epoch': total_num_epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, 'model/GAT_gate_epoch716_run09_20211215')

In [54]:
model = GAT_gate_Net(in_channels= 11, hidden_channels= 140, FC_channels= 128, out_channels= 1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.7, patience=10, min_lr=0.0025)
PATH = 'model/GAT_gate_epoch716_run09_20211215'

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

#model.eval()
# - or -
model.train()

FileNotFoundError: [Errno 2] No such file or directory: 'model/GAT_gate_epoch716_run09_20211215'

## Gated graph sequence neural network

In [41]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GatedGraphConv, GraphConv
from torch_geometric.nn import SAGPooling, global_add_pool

class GGS_NN_Net(torch.nn.Module):
    def __init__(self, in_channels, out_channels, num_layers):
        super().__init__()
        # parameters
        self.in_channels = in_channels # dim(node features)
        self.out_channels = out_channels # 
        self.num_layers = num_layers

       # model structure
        self.conv1 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        self.conv2 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        self.conv3 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        self.conv4 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        self.conv5 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        self.conv6 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        self.conv7 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        self.conv8 = GatedGraphConv(out_channels= self.in_channels, num_layers= self.num_layers)
        #self.pool1 = SAGPooling(self.in_channels, min_score=0.001, GNN=GraphConv)
        self.lin1 = Linear(self.in_channels * 2, self.in_channels)
        self.lin2 = Linear(self.in_channels, self.out_channels)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
     
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        
        x = self.conv2(x, edge_index)
        x = F.elu(x)
        
        x = self.conv3(x, edge_index)
        x = F.elu(x)
        
        x = self.conv4(x, edge_index)
        x = F.elu(x)
        
        x = self.conv5(x, edge_index)
        x = F.elu(x)

        x = self.conv6(x, edge_index)
        x = F.elu(x)
        
        x = self.conv7(x, edge_index)
        x = F.elu(x)
        
        x = self.conv8(x, edge_index)
        x = F.elu(x)
        
        x = torch.cat((x, data.x), dim=1) #skip connect
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        x = self.lin1(x)
        x = F.dropout(x, p=0.1, training=self.training)
        x = self.lin2(x)
        return x

In [43]:
# CPU --> GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GGS_NN_Net(in_channels= 11, out_channels= 1, num_layers=20).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.9, patience=10, min_lr=0.00001)

In [44]:
print(model)

GGS_NN_Net(
  (conv1): GatedGraphConv(11, num_layers=20)
  (conv2): GatedGraphConv(11, num_layers=20)
  (conv3): GatedGraphConv(11, num_layers=20)
  (conv4): GatedGraphConv(11, num_layers=20)
  (conv5): GatedGraphConv(11, num_layers=20)
  (conv6): GatedGraphConv(11, num_layers=20)
  (conv7): GatedGraphConv(11, num_layers=20)
  (conv8): GatedGraphConv(11, num_layers=20)
  (lin1): Linear(in_features=22, out_features=11, bias=True)
  (lin2): Linear(in_features=11, out_features=1, bias=True)
)


In [45]:
count_parameters(model)

+---------------------+------------+
|       Modules       | Parameters |
+---------------------+------------+
|     conv1.weight    |    2420    |
| conv1.rnn.weight_ih |    363     |
| conv1.rnn.weight_hh |    363     |
|  conv1.rnn.bias_ih  |     33     |
|  conv1.rnn.bias_hh  |     33     |
|     conv2.weight    |    2420    |
| conv2.rnn.weight_ih |    363     |
| conv2.rnn.weight_hh |    363     |
|  conv2.rnn.bias_ih  |     33     |
|  conv2.rnn.bias_hh  |     33     |
|     conv3.weight    |    2420    |
| conv3.rnn.weight_ih |    363     |
| conv3.rnn.weight_hh |    363     |
|  conv3.rnn.bias_ih  |     33     |
|  conv3.rnn.bias_hh  |     33     |
|     conv4.weight    |    2420    |
| conv4.rnn.weight_ih |    363     |
| conv4.rnn.weight_hh |    363     |
|  conv4.rnn.bias_ih  |     33     |
|  conv4.rnn.bias_hh  |     33     |
|     conv5.weight    |    2420    |
| conv5.rnn.weight_ih |    363     |
| conv5.rnn.weight_hh |    363     |
|  conv5.rnn.bias_ih  |     33     |
|

25961

In [46]:
def train(epoch):
    model.train()
    loss_all = 0

    for idx, batch in enumerate(train_loader):
        batch = batch.to(device)
        y = torch.index_select(batch.y, 1, torch.tensor(10))
        optimizer.zero_grad() #initialization
        loss = F.mse_loss(model(batch), y) # (predicted value, true value)
        loss.backward()
        loss_all += loss.item() * batch.num_graphs
        optimizer.step()  # to update the parameters
        #if idx%500 == 0:
        #    print(f"IDX: {idx:5d}\tLoss: {loss:.4f}")
            
    return loss_all / len(train_loader.dataset)

In [47]:
def test(loader):
    model.eval()
    error = 0.0
    out_all = []
    true = []
    
    for batch in loader:
        #batch = data.to(device) #it trigger error!
        out = model(batch)
        y = torch.index_select(batch.y, 1, torch.tensor(10))
        tmp = (out - y)**2
        error += tmp.sum().item()
        
        out_all.extend([x.item() for x in out])
        true.extend([x.item() for x in y])
        
        #error += (model(batch) * std - y * std).abs().sum().item()  # MAE
    return error / len(loader.dataset)

In [48]:
test(test_loader)

125720592.65183826

In [None]:
#input parameters
total_num_epoch = 0 #the total number of epoch that have run
running_num_epoch = 300

#running code
import time
total_time_start = time.time() # to measure time
best_validation_error = None
for epoch in range(1, running_num_epoch+1):
    epoch_time_start = time.time() # to measure time
    lr = scheduler.optimizer.param_groups[0]['lr']
    loss = train(epoch)
    validation_error = test(validation_loader)
    scheduler.step(validation_error)

    if best_validation_error is None or validation_error <= best_validation_error:
        test_error = test(test_loader)
        best_validation_error = validation_error
    
    total_num_epoch = total_num_epoch + 1
    epoch_time_end = time.time() # to measure time
    print(f'ToTal Epoch: {total_num_epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
          f'Val MAE: {validation_error:.7f}, Test MAE: {test_error:.7f}, Time: {epoch_time_end - epoch_time_start}')

total_time_finish = time.time() # to measure time
print(f'Done. Total Time: {total_time_finish - total_time_start}')

## Directed_MPNN

In [45]:
import torch
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import knn_graph
from torch.nn import Sequential as Seq, Linear, ReLU

from torch_geometric.utils import dense_to_sparse, to_dense_adj


class Directed_MPNN(MessagePassing):
    def __init__(self, in_channels: int, hidden_channels: int, out_channels: int, atom_fdim: int, bond_fdim: int):
        super().__init__(aggr='add')  # "Add" aggregation (Step 5).
        self.in_channels = in_channels
        self.hidden_channels = hidden_channels
        self.out_channels = out_channels
        self.atom_fdim = atom_fdim
        self.bond_fdim = bond_fdim
        self.dropout = 0.1
        self.adjacency_amtrix = to_dense_adj(data.edge_index)
        
        # Shared weight matrix across depths (default)
        self.Wi = torch.nn.Linear(self.in_channels, self.hidden_channels) #
        w_h_input_size = self.hidden_channels + self.bond_fdim #
        self.Wh = torch.nn.Linear(w_h_input_size, self.hidden_channels) # [E, E_attr + N_attr]
        self.dropout_layer = nn.Dropout(p=self.dropout)
        
    def forward(self, node_attr, edge_index, edge_attr):
        # x (node and its feature) has shape [N, in_channels]
        # edge_index has shape [2, E]
        # edge_attr has shape [E, the number of edge attributes]
        
        # to convert a node representation into an edge representation
        #knn_graph(x, k=1, batch=None, loop=False, flow="source_to_target")
        source = torch.index_select(edge_index, 0, torch.tensor([0])) # select source node per each edge
        node_feature_for_concat = torch.index_select(node_attr, 0, source.squeeze()) # select the node feature to concat with edge_attr
        concat = torch.cat((node_feature_for_concat, edge_attr), dim=1) # [E, E_attr + N_attr]
        msg_input = ReLU(self.Wi(concat)))  
            #self.Wi : (E_attr + N_attr, hidden_channels)
        h_vw = msg_input # [E, hidden_channels]
        
        
        # edge-based message
        for depth in range(self.depth-1) :
            #message = h_vw.sum(dim=1) # [hidden_channels]
    
            # select the neigborhood edge
            for iteration in range(node_attr.size(dim=0)) :
                boolian=torch.isin(torch.index_select(node_attr, 1, source.squeeze(), torch.tensor[iteration])
                                   boolian --
                neighboor = torch.index_select(edge_attr, 0, boolian)                   
                msg_neighboor = torch.where(x == iteration, , 0, dtype=torch.int)
            msg_update = ReLU(msg_input + self.Wh(message)) # [, hidden_channels]
            msg_update = self.dropout_layer(mes_update)

            # msg_update --> message for memory
        
        
        
#         ###########
#         # Step 1: Add self-loops to the adjacency matrix.
#         edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
        

#         # Step 2: Linearly transform node feature matrix.
#         x = self.lin(x)

#         # Step 3: Compute normalization.
#         row, col = edge_index
#         deg = degree(col, x.size(0), dtype=x.dtype)
#         deg_inv_sqrt = deg.pow(-0.5)
#         deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
#         norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

#         # Step 4-5: Start propagating messages.
#         return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        # x_j has shape [E, out_channels]

        # Step 4: Normalize node features.
        return norm.view(-1, 1) * x_j

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 50)

In [52]:
Directed_MPNN(10, 20, 30,14 ,4)

Directed_MPNN(
  (Wi): Linear(in_features=10, out_features=20, bias=True)
  (Wh): Linear(in_features=24, out_features=20, bias=True)
)

Neural Network Architecture

In [59]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv
from torch_geometric.nn import GINConv
from torch_geometric.nn import global_add_pool

class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, depth):
        super().__init__()
        # parameters
        self.in_channels = in_channels # dim(node features)
        self.hidden_channels = hidden_channels # dim(node embedding)
        self.depth = depth # the number of message passing
        
        # model structure
        self.conv1 = GCNConv(self.in_channels, self.hidden_channels)
        self.conv2 = GCNConv(self.hidden_channels, self.hidden_channels)
        self.conv3 = GCNConv(self.hidden_channels, self.hidden_channels)
        self.conv4 = GCNConv(self.hidden_channels, self.hidden_channels)
        self.conv5 = GCNConv(self.hidden_channels, self.hidden_channels)
        self.lin1 = Linear(self.hidden_channels, self.hidden_channels)
        self.lin2 = Linear(self.hidden_channels, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = self.conv1(x, edge_index)
        x = F.elu(x)
        
        x = self.conv2(x, edge_index)
        x = F.elu(x)
        
        x = self.conv3(x, edge_index)
        x = F.elu(x)
        
#         x = self.conv4(x, edge_index)
#         x = F.elu(x)
        
#         x = self.conv5(x, edge_index)
#         x = F.elu(x)
        
        x = global_add_pool(x, batch) # [batch_size, hidden_channels]
        x = self.lin1(x)
        x = F.dropout(x, p=0.1, training=self.training)
        x = self.lin2(x)
        
        return x
            #[32, 1]


In [60]:
Directed_MPNN(10, 20)

Directed_MPNN(
  (conv1): GCNConv(10, 20)
  (conv2): GCNConv(20, 20)
  (conv3): GCNConv(20, 20)
  (conv4): GCNConv(20, 20)
  (conv5): GCNConv(20, 20)
  (lin1): Linear(in_features=20, out_features=20, bias=True)
  (lin2): Linear(in_features=20, out_features=1, bias=True)
)

# Ref
- https://github.com/pyg-team/pytorch_geometric/blob/master/examples/qm9_nn_conv.py

In [None]:
https://github.com/pyg-team/pytorch_geometric/blob/master/examples/mutag_gin.py