In [1]:
import time

import torch

import torch.nn.functional as F
from torch.nn import ModuleList, Embedding
from torch.nn import Sequential, ReLU, Linear
from torch.nn import CrossEntropyLoss, MSELoss, L1Loss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torch_geometric.utils import degree
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, PNAConv, BatchNorm, global_add_pool

from phylognn_model import G2Dist_GCNConv

from gene_graph_dataset import GeneGraphDataset

from torch.utils.tensorboard import SummaryWriter

In [2]:
train_p, test_p = 0.7, 0.2

In [3]:
dataset = GeneGraphDataset('dataset', 20, 20, graph_num = 100)
data_size = len(dataset)
train_size, test_size = (int)(data_size * train_p), (int)(data_size * test_p)

Generating...
Processing...
Done!


In [4]:
data_size

2000

In [5]:
dataset = dataset.shuffle()
train_dataset = dataset[:train_size]
test_dataset = dataset[train_size:(train_size + test_size)]
val_dataset = dataset[(train_size + test_size):]

In [6]:
# len(train_dataset), len(test_dataset), len(val_dataset)

In [7]:
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)
val_loader = DataLoader(val_dataset, batch_size=1)

In [8]:
# len(train_loader), len(test_loader), len(val_loader)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = G2Dist_GCNConv().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay = 0.0001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10,
                              min_lr=0.00001)

In [10]:
# loss_fn = MSELoss()
# l1_fn = L1Loss()

loss_fn = CrossEntropyLoss()

def train(train_loader):
    model.train()

    total_loss, counter = 0, 0
    size = len(train_loader)
    for batch, data in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        #loss = (out.squeeze() - data.y).abs().sum()
        pred, y = out.softmax(axis = 1).argmax(axis = 1), data.y
        counter += (pred == y).sum().item()
        
        loss = loss_fn(out, data.y)
        
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        
    return total_loss / len(train_loader), counter

In [11]:
@torch.no_grad()
def test(loader):
    model.eval()

    total_error, counter = 0, 0
    for data in loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        
        pred, y = out.softmax(axis = 1).argmax(axis = 1), data.y
        counter += (pred == y).sum().item()
        
        # total_error += (out.squeeze() - data.y).abs().sum().item()
        
        total_error += loss_fn(out, data.y).item()
        
    return total_error / len(loader), counter

In [12]:
writer = SummaryWriter(log_dir='runs_g2d_10/g2dist_0020_0020_02000-gcn-run8')

In [33]:
import numpy as np
for epoch in range(101, 1001):
    loss, train_counter = train(train_loader)
    test_mae, test_counter = test(test_loader)
    val_mae, _ = test(val_loader)
    
    scheduler.step(loss)
    
    writer.add_scalar('Loss/train', loss, epoch)
    writer.add_scalar('Loss/test', test_mae, epoch)
    writer.add_scalar('Loss/val', val_mae, epoch)
    writer.add_scalar('Counter/train', train_counter/len(train_loader.dataset), epoch)
    writer.add_scalar('Counter/test', test_counter/len(test_loader.dataset), epoch)
    
    print(f'{time.ctime()}\t'
          f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, '
          f'Test: {test_mae:.4f}')
    
    print(f'\t\t -- train_counter: {train_counter}, test_counter:{test_counter}')

Sat Jan  1 21:40:55 2022	Epoch: 101, Loss: 1.9609, Val: 2.7483, Test: 2.7595
		 -- train_counter: 412, test_counter:61
Sat Jan  1 21:40:57 2022	Epoch: 102, Loss: 1.9671, Val: 2.5827, Test: 2.6197
		 -- train_counter: 407, test_counter:42
Sat Jan  1 21:41:00 2022	Epoch: 103, Loss: 1.9705, Val: 2.4592, Test: 2.4404
		 -- train_counter: 443, test_counter:77
Sat Jan  1 21:41:02 2022	Epoch: 104, Loss: 1.9532, Val: 2.2629, Test: 2.3207
		 -- train_counter: 432, test_counter:81
Sat Jan  1 21:41:04 2022	Epoch: 105, Loss: 1.9487, Val: 2.4140, Test: 2.5904
		 -- train_counter: 402, test_counter:51
Sat Jan  1 21:41:07 2022	Epoch: 106, Loss: 1.9282, Val: 2.6279, Test: 2.6336
		 -- train_counter: 443, test_counter:69
Sat Jan  1 21:41:09 2022	Epoch: 107, Loss: 1.9272, Val: 2.5520, Test: 2.5510
		 -- train_counter: 461, test_counter:68
Sat Jan  1 21:41:11 2022	Epoch: 108, Loss: 1.9564, Val: 2.3223, Test: 2.3992
		 -- train_counter: 425, test_counter:76
Sat Jan  1 21:41:14 2022	Epoch: 109, Loss: 1.927

In [14]:
model.eval()

G2Dist_GCNConv(
  (node_emb): Embedding(10000, 2)
  (convs): ModuleList(
    (0): GCNConv(80, 100)
    (1): GCNConv(25, 25)
    (2): GCNConv(8, 8)
    (3): GCNConv(4, 4)
  )
  (pools): ModuleList(
    (0): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (1): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (batch_norms): ModuleList(
    (0): BatchNorm(25)
    (1): BatchNorm(8)
    (2): BatchNorm(4)
    (3): BatchNorm(2)
  )
  (mlp): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
  )
  (lin): Linear(in_features=40, out_features=20, bias=True)
)

In [15]:
tld0 = list(train_loader)[0].to(device)
tld1 = list(test_loader)[0].to(device)

In [16]:
res0 = model(tld0.x, tld0.edge_index, tld0.batch)

In [17]:
res0

tensor([[-8.7049e+00, -5.1954e+00, -3.6045e+00,  ...,  3.5614e+00,
          3.5578e+00,  3.2431e-01],
        [-2.4642e+00,  1.3411e-01,  5.5629e-01,  ..., -3.0806e+00,
         -2.3589e+00, -2.8178e+00],
        [ 9.3100e+00,  9.5042e+00,  9.2877e+00,  ..., -1.7074e+01,
         -1.3243e+01, -9.3773e+00],
        ...,
        [-7.3904e+00, -3.6292e+00, -2.8910e+00,  ...,  2.1543e+00,
          1.6737e+00,  8.7467e-01],
        [ 4.2349e+00,  5.6975e+00,  6.2074e+00,  ..., -1.1185e+01,
         -6.8209e+00, -6.6732e+00],
        [-5.6582e+00, -2.3457e+00, -1.2107e+00,  ...,  9.4853e-02,
          4.8809e-01, -1.4580e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [18]:
res0.argmax(axis = 1)

tensor([13,  4,  3, 13,  2, 14, 12,  5,  0,  6, 12, 15,  0, 12,  4,  0, 12, 13,
         0, 12,  0,  9,  5, 13,  4,  0,  1,  1, 13, 12, 14, 11, 14,  8, 13, 13,
        15,  4,  0,  8, 10,  2, 10, 15, 12, 12,  0, 14,  1, 12, 13, 11,  8,  0,
        12,  3,  8, 15,  7, 15,  4,  6,  1,  0, 10,  2,  6, 12, 11,  0,  9,  3,
        15,  7, 16,  3, 13, 10,  8, 14,  8, 12,  7, 15,  0, 13, 12,  7,  5,  0,
         2, 11, 15,  4,  2, 10, 13, 13, 13,  0, 12,  6,  0,  0, 15,  6,  7,  0,
         1, 11,  3,  9,  9,  8,  5, 13,  9, 13, 11, 14,  0, 12,  1, 12,  8,  0,
        11,  7, 12,  1,  0,  2, 16,  6,  1, 15,  5,  1, 11,  5, 16,  1,  8, 15,
         9,  4,  1,  2,  0, 15,  1,  0, 13,  3, 10,  5, 14,  7,  0,  0,  8,  9,
        14,  4,  7,  0, 15, 15, 10, 12, 16, 12, 10, 12, 13,  9,  4,  9, 11, 15,
        13, 12, 11, 10, 11, 13,  1,  7, 10,  0, 11,  9, 14, 15,  9,  2,  8,  9,
        11,  9, 12,  0,  8, 13, 14,  4, 11, 11,  7,  6, 11,  4,  5,  4, 13, 13,
        11,  6,  8,  8,  0, 13, 13,  9, 

In [19]:
tld0.y

tensor([14,  7,  3, 13,  9,  8,  6,  6,  2,  7, 11, 14,  1, 12,  9,  6, 13, 13,
         1,  7,  0,  7,  7, 11,  2,  4,  4,  1, 11, 10, 14, 15, 10,  8, 13, 13,
        17,  4,  0, 10,  7,  4, 13, 12, 11, 11,  0, 11,  2, 10, 12,  8,  5,  1,
        13,  6,  8, 13,  9, 16,  5,  7,  3,  1, 10,  1,  9, 11, 11,  0, 12,  4,
        14,  9, 16,  6,  6, 12,  7, 15, 10, 11,  7, 17,  0, 13,  7,  7,  9,  4,
         3,  8,  9,  5,  1, 10, 13, 10, 11,  4, 13,  8,  1,  0, 13,  7,  7,  1,
         1, 11,  6, 11,  9,  9,  5, 15,  6, 13, 12, 14,  0,  8,  3, 10,  8,  2,
        13,  1, 14,  2,  1,  1, 15, 14,  1, 14, 12,  2, 11,  6, 14,  4, 11, 15,
         7,  7,  5,  5,  1, 15,  2,  3, 12,  8, 10,  3, 14,  5,  0,  2,  2,  8,
        14,  2, 11,  0, 10, 16,  8,  5, 17, 16, 12,  7, 14,  4,  6, 11, 11, 15,
        15, 16, 14, 13,  4, 11,  1,  8,  7,  0, 12,  9, 15, 15,  4,  1,  8, 13,
         4,  9, 14,  0,  7, 16, 13,  9, 11,  8,  8,  9,  4, 11, 12, 11, 13, 14,
        10,  6,  8,  8,  4, 13, 16, 12, 

In [20]:
loss_fn(res0, tld0.y)

tensor(2.1307, device='cuda:0', grad_fn=<NllLossBackward0>)

In [21]:
L1Loss()(res0.argmax(axis = 1).to(torch.float), tld0.y.to(torch.float))

tensor(1.9492, device='cuda:0')

In [22]:
(res0.argmax(axis = 1) - tld0.y).abs().sum().item()/len(tld0.y)

1.94921875

In [23]:
res1 = model(tld1.x, tld1.edge_index, tld1.batch)

In [24]:
res1.argmax(axis = 1)

tensor([ 7, 14,  8,  9,  7, 12,  5,  9,  0,  2, 16, 13,  1,  0, 12,  8,  3,  5,
         7, 11, 14,  0,  7,  9, 15, 13, 15,  0,  0, 15,  0,  0, 12, 11,  3,  6,
         3, 13,  5, 11, 13,  0,  9, 10,  0,  0,  1, 10,  0, 10, 12, 13, 15, 11,
         5, 14, 13, 11, 12, 11,  4, 15,  8,  2], device='cuda:0')

In [25]:
tld1.y

tensor([ 3, 16,  6, 11,  5,  8,  8, 17,  0,  4, 15, 11, 12,  0, 14, 11,  4, 12,
        12,  9, 14,  2,  8,  7, 15, 13, 13,  3,  1, 15,  7,  1,  8, 12,  3,  6,
         3, 17, 14,  9, 12,  2, 13,  7,  0,  3,  3,  8,  2,  7,  8, 13, 12, 14,
         3, 15,  9,  9,  9,  9,  2, 14,  8,  5], device='cuda:0')

In [26]:
loss_fn(res1, tld1.y)

tensor(2.4001, device='cuda:0', grad_fn=<NllLossBackward0>)

In [27]:
L1Loss()(res1.argmax(axis = 1).to(torch.float), tld1.y.to(torch.float))

tensor(2.4062, device='cuda:0')

In [28]:
train_y = [d.y.item() for d in train_dataset]

In [29]:
np.unique(train_y)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [30]:
test_y = [d.y.item() for d in test_dataset]

In [31]:
np.unique(test_y)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18])

In [32]:
np.unique([d.y.item() for d in val_dataset])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18])