In [1]:
import time

import torch

import torch.nn.functional as F
from torch.nn import ModuleList, Embedding
from torch.nn import Sequential, ReLU, Linear
from torch.nn import CrossEntropyLoss, MSELoss, L1Loss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torch_geometric.utils import degree
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, PNAConv, BatchNorm, global_add_pool

from phylognn_model import G2Dist_GCNConv

from gene_graph_dataset import GeneGraphDataset

from torch.utils.tensorboard import SummaryWriter

In [2]:
train_p, test_p = 0.7, 0.2

In [3]:
dataset = GeneGraphDataset('dataset', 20, 20, graph_num = 100)
data_size = len(dataset)
train_size, test_size = (int)(data_size * train_p), (int)(data_size * test_p)

In [4]:
data_size

2000

In [5]:
dataset = dataset.shuffle()
train_dataset = dataset[:train_size]
test_dataset = dataset[train_size:(train_size + test_size)]
val_dataset = dataset[(train_size + test_size):]

In [6]:
# len(train_dataset), len(test_dataset), len(val_dataset)

In [7]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)
val_loader = DataLoader(val_dataset, batch_size=1)

In [8]:
# len(train_loader), len(test_loader), len(val_loader)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = G2Dist_GCNConv().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay = 0.0001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10,
                              min_lr=0.00001)

In [10]:
# loss_fn = MSELoss()
# l1_fn = L1Loss()

loss_fn = CrossEntropyLoss()

def train(train_loader):
    model.train()

    total_loss, counter = 0, 0
    size = len(train_loader)
    for batch, data in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        #loss = (out.squeeze() - data.y).abs().sum()
        pred, y = out.softmax(axis = 1).argmax(axis = 1), data.y
        counter += (pred == y).sum().item()
        
        loss = loss_fn(out, data.y)
        
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        
    return total_loss / len(train_loader), counter

In [11]:
@torch.no_grad()
def test(loader):
    model.eval()

    total_error, counter = 0, 0
    for data in loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        
        pred, y = out.softmax(axis = 1).argmax(axis = 1), data.y
        counter += (pred == y).sum().item()
        
        # total_error += (out.squeeze() - data.y).abs().sum().item()
        
        total_error += loss_fn(out, data.y).item()
        
    return total_error / len(loader), counter

In [12]:
writer = SummaryWriter(log_dir='runs_g2d_10/g2dist_0020_0020_02000-gcn-run9')

In [13]:
import numpy as np
for epoch in range(1, 1001):
    loss, train_counter = train(train_loader)
    test_mae, test_counter = test(test_loader)
    val_mae, _ = test(val_loader)
    
    scheduler.step(loss)
    
    writer.add_scalar('Loss/train', loss, epoch)
    writer.add_scalar('Loss/test', test_mae, epoch)
    writer.add_scalar('Loss/val', val_mae, epoch)
    writer.add_scalar('Counter/train', train_counter/len(train_loader.dataset), epoch)
    writer.add_scalar('Counter/test', test_counter/len(test_loader.dataset), epoch)
    
    print(f'{time.ctime()}\t'
          f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, '
          f'Test: {test_mae:.4f}')
    
    print(f'\t\t -- train_counter: {train_counter}, test_counter:{test_counter}')

Sat Jan  1 21:55:54 2022	Epoch: 001, Loss: 2.9261, Val: 9.1281, Test: 9.5538
		 -- train_counter: 74, test_counter:19
Sat Jan  1 21:55:57 2022	Epoch: 002, Loss: 2.8284, Val: 12.5468, Test: 13.3898
		 -- train_counter: 117, test_counter:8
Sat Jan  1 21:56:00 2022	Epoch: 003, Loss: 2.7784, Val: 22.0224, Test: 23.1461
		 -- train_counter: 131, test_counter:19
Sat Jan  1 21:56:02 2022	Epoch: 004, Loss: 2.6928, Val: 21.5115, Test: 23.7365
		 -- train_counter: 195, test_counter:19
Sat Jan  1 21:56:05 2022	Epoch: 005, Loss: 2.6701, Val: 14.2263, Test: 15.5268
		 -- train_counter: 187, test_counter:20
Sat Jan  1 21:56:08 2022	Epoch: 006, Loss: 2.5852, Val: 7.2370, Test: 8.1807
		 -- train_counter: 227, test_counter:21
Sat Jan  1 21:56:11 2022	Epoch: 007, Loss: 2.5179, Val: 3.0736, Test: 2.9173
		 -- train_counter: 231, test_counter:32
Sat Jan  1 21:56:13 2022	Epoch: 008, Loss: 2.4427, Val: 2.9152, Test: 2.9352
		 -- train_counter: 258, test_counter:38
Sat Jan  1 21:56:16 2022	Epoch: 009, Loss:

In [14]:
model.eval()

G2Dist_GCNConv(
  (node_emb): Embedding(10000, 2)
  (convs): ModuleList(
    (0): GCNConv(80, 100)
    (1): GCNConv(49, 49)
    (2): GCNConv(24, 24)
    (3): GCNConv(12, 12)
  )
  (pools): ModuleList(
    (0): MaxPool1d(kernel_size=4, stride=2, padding=0, dilation=1, ceil_mode=False)
    (1): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): MaxPool1d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (batch_norms): ModuleList(
    (0): BatchNorm(49)
    (1): BatchNorm(24)
    (2): BatchNorm(12)
    (3): BatchNorm(11)
  )
  (mlp): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
  )
  (lin): Sequential(
    (0): Linear(in_features=440, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=100, bias=True)
    (3): ReLU()
    (4): Linear(in_features=100, out_features=20, bias=True)
  )
)

In [15]:
tld0 = list(train_loader)[0].to(device)
tld1 = list(test_loader)[0].to(device)

In [16]:
res0 = model(tld0.x, tld0.edge_index, tld0.batch)

In [17]:
res0

tensor([[  2.4530,  23.7187,   0.4148,  ..., -71.3455, -55.3218, -56.0174],
        [-12.5859,   2.2006,  -5.9482,  ..., -17.7220, -13.3263, -23.6975],
        [  4.4539,  17.5653,  27.3866,  ..., -83.1137, -87.1299, -73.9906],
        ...,
        [ -7.8022,   4.1969,   2.3293,  ..., -32.5450, -33.6475, -31.9167],
        [-31.3682, -21.2330, -12.9884,  ..., -10.3788,  -2.6255, -13.1376],
        [-22.3281,  -5.5378,  -1.5480,  ..., -40.0321, -25.6243, -36.3368]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [18]:
res0.argmax(axis = 1)

tensor([ 1,  6,  2, 14,  8, 15, 13, 13,  9, 15,  7,  9, 13, 14, 13,  6, 16,  5,
         5, 16, 10,  2,  5, 11, 11,  9,  4, 18, 16,  0, 14, 13, 14,  1, 15, 14,
         7,  3,  0,  6, 12, 10,  8, 15, 15,  7, 14,  4,  9,  4, 15, 12, 13, 11,
         3, 10,  9,  5, 13, 11,  0, 10, 12, 11,  4,  9,  5, 12, 16, 15,  1, 18,
         8,  0, 11,  8,  6,  8, 10,  0, 15,  4,  2, 13,  5, 18, 15,  7, 11, 14,
         7,  7, 13,  6, 13,  5, 11, 15,  5, 14, 12,  1,  6,  2,  2,  9,  5, 15,
        12, 12, 13,  7, 16,  6,  3, 14, 12,  8, 10, 12, 14,  4,  7,  8,  5,  0,
         2, 12,  6, 13, 15,  8,  5,  2,  6,  3, 15,  8, 17, 12,  0,  5, 10,  4,
        11,  7, 10,  5, 14,  7, 14, 11, 14,  6, 14, 15,  2,  8,  1, 11, 12,  2,
         5,  2,  9, 12, 16, 13, 15, 13, 17,  8, 10, 13,  5, 12, 12, 14, 12, 14,
        12, 14,  9,  1,  6, 12, 17,  2,  9, 15,  2,  8, 16, 12,  4,  5, 13,  5,
        11, 14,  4, 13,  9, 13, 15,  3,  2,  9, 12,  2, 14, 13,  4,  1, 16,  4,
        11,  4,  4,  2, 13,  2,  4,  3, 

In [34]:
tmp_tld = list(train_loader)[1].to(device)
tmp = model(tld0.x, tmp_tld.edge_index, tld0.batch)

In [35]:
tmp.argmax(axis = 1)

tensor([11,  9,  9, 15, 14, 16, 13,  4, 12,  9, 10, 12, 13, 16, 15, 10, 14, 14,
         7, 11, 13,  8, 14, 11,  5, 11,  1, 17, 16,  0, 13, 14, 11,  2, 14, 16,
         9, 11,  0,  8,  9, 12, 14, 13, 13, 15, 13, 12, 14, 14, 13,  4, 13, 13,
        14, 14,  4,  4, 16, 13,  0, 13, 13, 15, 12, 14, 15, 12, 14, 14,  8, 17,
         8,  1, 16, 11,  8,  9,  5,  0, 15, 13,  7, 17,  5,  8, 16,  8,  9, 15,
        17, 10, 14,  8, 13, 12,  9, 15,  6, 13, 14, 13, 13, 13, 15,  9,  8, 15,
        16,  5,  9, 16, 13,  9,  2, 12, 14, 12,  5, 13, 14, 13,  9, 15, 14,  0,
         4, 12,  8, 15, 13, 15, 14, 13,  9, 16, 17, 12, 14, 15,  0, 11, 12, 11,
        11, 16, 10, 12,  3,  9, 14, 13, 13,  5, 13,  9,  4, 12,  1, 12, 15,  7,
         6, 13, 10,  9,  8, 16, 13, 11, 14, 16, 14,  5,  5, 15, 15,  9, 14,  8,
        12, 14, 14,  0,  8, 10, 12,  7,  7, 14,  5,  9, 17, 15,  7, 11, 14, 13,
        17, 13, 12, 13, 15, 14, 14,  5,  2, 16, 13, 13,  8, 12, 14,  7, 12, 13,
        15, 12, 19,  7, 14,  4, 14,  5, 

In [19]:
tld0.y

tensor([ 1,  6,  2, 14,  8, 15, 13, 13,  9, 15,  7,  9, 13, 14, 13,  6, 16,  5,
         5, 16, 10,  2,  5, 11, 11,  9,  4, 18, 16,  0, 14, 13, 14,  1, 15, 14,
         7,  3,  0,  6, 12, 10,  8, 15, 15,  7, 14,  4,  9,  4, 15, 12, 13, 11,
         3, 10,  9,  5, 13, 11,  0, 10, 12, 11,  4,  9,  5, 12, 16, 15,  1, 18,
         8,  0, 11,  8,  6,  8, 10,  0, 15,  4,  2, 13,  5, 18, 15,  7, 11, 14,
         7,  7, 13,  6, 13,  5, 11, 15,  5, 14, 12,  1,  6,  2,  2,  9,  5, 15,
        12, 12, 13,  7, 16,  6,  3, 14, 12,  8, 10, 12, 14,  4,  7,  8,  5,  0,
         2, 12,  6, 13, 15,  8,  5,  2,  6,  3, 15,  8, 17, 12,  0,  5, 10,  4,
        11,  7, 10,  5, 14,  7, 14, 11, 14,  6, 14, 15,  2,  8,  1, 11, 12,  2,
         5,  2,  9, 12, 16, 13, 15, 13, 17,  8, 10, 13,  5, 12, 12, 14, 12, 14,
        12, 14,  9,  1,  6, 12, 17,  2,  9, 15,  2,  8, 16, 12,  4,  5, 13,  5,
        11, 14,  4, 13,  9, 13, 15,  3,  2,  9, 12,  2, 14, 13,  4,  1, 16,  4,
        11,  4,  4,  2, 13,  2,  4,  3, 

In [20]:
loss_fn(res0, tld0.y)

tensor(0.0173, device='cuda:0', grad_fn=<NllLossBackward0>)

In [21]:
L1Loss()(res0.argmax(axis = 1).to(torch.float), tld0.y.to(torch.float))

tensor(0., device='cuda:0')

In [37]:
L1Loss()(res0.argmax(axis = 1).to(torch.float), res0.argmax(axis = 1).to(torch.float))

tensor(0., device='cuda:0')

In [36]:
L1Loss()(res0.argmax(axis = 1).to(torch.float), tmp.argmax(axis = 1).to(torch.float))

tensor(3.7148, device='cuda:0')

In [22]:
(res0.argmax(axis = 1) - tld0.y).abs().sum().item()/len(tld0.y)

0.0

In [23]:
res1 = model(tld1.x, tld1.edge_index, tld1.batch)

In [24]:
res1.argmax(axis = 1)

tensor([ 1, 15, 13,  8,  5,  2, 13,  2, 13,  9,  1,  7, 11,  5,  9, 16, 13, 11,
        12, 13,  3, 11,  6,  8, 13,  1,  5,  8,  0, 12, 12,  0, 10,  3, 12,  3,
         1,  9, 13,  4,  4,  5, 12,  8, 14,  2,  4,  9,  9,  7, 12,  1, 12, 13,
         9, 16,  9,  7, 13,  4,  7, 14, 11,  5], device='cuda:0')

In [25]:
tld1.y

tensor([ 5, 16,  9,  7,  8,  3, 16,  3, 16, 12,  6, 11,  9, 10, 15, 16, 12, 10,
        12, 16,  4,  3, 10,  9, 10,  4, 11,  5,  0, 15, 15,  0, 13,  1, 10,  6,
         3, 11, 17,  4,  6, 11,  6, 12, 14, 11,  6, 10,  8,  3, 14,  4, 14, 17,
        15, 11, 11,  5, 13,  9,  9, 12, 14, 14], device='cuda:0')

In [26]:
loss_fn(res1, tld1.y)

tensor(11.1721, device='cuda:0', grad_fn=<NllLossBackward0>)

In [27]:
L1Loss()(res1.argmax(axis = 1).to(torch.float), tld1.y.to(torch.float))

tensor(2.9062, device='cuda:0')

In [28]:
train_y = [d.y.item() for d in train_dataset]

In [29]:
np.unique(train_y)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [30]:
test_y = [d.y.item() for d in test_dataset]

In [31]:
np.unique(test_y)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18])

In [32]:
np.unique([d.y.item() for d in val_dataset])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18])