In [1]:
import time

import torch

import torch.nn.functional as F
from torch.nn import ModuleList, Embedding
from torch.nn import Sequential, ReLU, Linear
from torch.nn import CrossEntropyLoss, MSELoss, L1Loss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torch_geometric.utils import degree
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, PNAConv, BatchNorm, global_add_pool

from phylognn_model import G2Dist_GCNConv_Global

from gene_graph_dataset import GeneGraphDataset

from torch.utils.tensorboard import SummaryWriter

In [2]:
train_p, test_p = 0.7, 0.2

In [3]:
dataset = GeneGraphDataset('dataset', 20, 5, graph_num = 400)
data_size = len(dataset)
train_size, test_size = (int)(data_size * train_p), (int)(data_size * test_p)

Generating...
Processing...
Done!


In [4]:
data_size

2000

In [5]:
dataset = dataset.shuffle()
train_dataset = dataset[:train_size]
test_dataset = dataset[train_size:(train_size + test_size)]
val_dataset = dataset[(train_size + test_size):]

In [6]:
# len(train_dataset), len(test_dataset), len(val_dataset)

In [7]:
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)
val_loader = DataLoader(val_dataset, batch_size=1)

In [8]:
# len(train_loader), len(test_loader), len(val_loader)

In [9]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')
model = G2Dist_GCNConv_Global().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 0.005)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10,
                              min_lr=0.00001)

In [10]:
writer = SummaryWriter(log_dir='runs_g2d_10/g2dist_20_05_02000-mean-run1')

In [11]:
# loss_fn = MSELoss()
# l1_fn = L1Loss()

loss_fn = CrossEntropyLoss()

def train(train_loader):
    model.train()

    total_loss, counter = 0, 0
    size = len(train_loader)
    for batch, data in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        #loss = (out.squeeze() - data.y).abs().sum()
        pred, y = out.softmax(axis = 1).argmax(axis = 1), data.y
        counter += (pred == y).sum().item()
        
        loss = loss_fn(out, data.y)
        
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        
    return total_loss / len(train_loader), counter

In [12]:
@torch.no_grad()
def test(loader):
    model.eval()

    total_error, counter = 0, 0
    for data in loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        
        pred, y = out.softmax(axis = 1).argmax(axis = 1), data.y
        counter += (pred == y).sum().item()
        
        # total_error += (out.squeeze() - data.y).abs().sum().item()
        
        total_error += loss_fn(out, data.y).item()
        
    return total_error / len(loader), counter

In [13]:
import numpy as np
for epoch in range(1, 201):
    loss, train_counter = train(train_loader)
    test_mae, test_counter = test(test_loader)
    val_mae, _ = test(val_loader)
    
    scheduler.step(loss)
    
    writer.add_scalar('Loss/train', loss, epoch)
    writer.add_scalar('Loss/test', test_mae, epoch)
    writer.add_scalar('Loss/val', val_mae, epoch)
    writer.add_scalar('Counter/train', train_counter/len(train_loader.dataset), epoch)
    writer.add_scalar('Counter/test', test_counter/len(test_loader.dataset), epoch)
    
    print(f'{time.ctime()}\t'
          f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, '
          f'Test: {test_mae:.4f}')
    
    # print(f'\t\t -- train_counter: {train_counter}, test_counter:{test_counter}')

Sun Jan  2 22:27:50 2022	Epoch: 001, Loss: 4.1724, Val: 3.0756, Test: 3.1187
Sun Jan  2 22:27:55 2022	Epoch: 002, Loss: 3.0898, Val: 2.7414, Test: 2.8029
Sun Jan  2 22:28:01 2022	Epoch: 003, Loss: 2.4643, Val: 2.5111, Test: 2.6015
Sun Jan  2 22:28:07 2022	Epoch: 004, Loss: 2.0938, Val: 2.3154, Test: 2.4331
Sun Jan  2 22:28:13 2022	Epoch: 005, Loss: 1.8394, Val: 2.1290, Test: 2.2625
Sun Jan  2 22:28:18 2022	Epoch: 006, Loss: 1.6334, Val: 1.9508, Test: 2.0844
Sun Jan  2 22:28:24 2022	Epoch: 007, Loss: 1.4551, Val: 1.7890, Test: 1.9095
Sun Jan  2 22:28:30 2022	Epoch: 008, Loss: 1.3134, Val: 1.6506, Test: 1.7472
Sun Jan  2 22:28:35 2022	Epoch: 009, Loss: 1.2361, Val: 1.5426, Test: 1.6150
Sun Jan  2 22:28:41 2022	Epoch: 010, Loss: 1.2038, Val: 1.4537, Test: 1.5140
Sun Jan  2 22:28:46 2022	Epoch: 011, Loss: 1.1841, Val: 1.3729, Test: 1.4340
Sun Jan  2 22:28:52 2022	Epoch: 012, Loss: 1.1501, Val: 1.2956, Test: 1.3746
Sun Jan  2 22:28:58 2022	Epoch: 013, Loss: 1.1111, Val: 1.2411, Test: 1.3360

In [14]:
model.eval()

G2Dist_GCNConv_Global(
  (node_emb): Embedding(10000, 2)
  (convs): ModuleList(
    (0): GCNConv(80, 80)
    (1): GCNConv(80, 80)
  )
  (pools): ModuleList()
  (batch_norms): ModuleList(
    (0): BatchNorm(80)
    (1): BatchNorm(80)
  )
  (lin): Sequential(
    (0): Linear(in_features=80, out_features=20, bias=True)
  )
)

In [15]:
model.train()

G2Dist_GCNConv_Global(
  (node_emb): Embedding(10000, 2)
  (convs): ModuleList(
    (0): GCNConv(80, 80)
    (1): GCNConv(80, 80)
  )
  (pools): ModuleList()
  (batch_norms): ModuleList(
    (0): BatchNorm(80)
    (1): BatchNorm(80)
  )
  (lin): Sequential(
    (0): Linear(in_features=80, out_features=20, bias=True)
  )
)

In [16]:
tld0 = list(train_loader)[0].to(device)
tld1 = list(test_loader)[0].to(device)

In [17]:
res0 = model(tld0.x, tld0.edge_index, tld0.batch)

In [18]:
res0.argmax(axis = 1)

tensor([0, 3, 1, 2, 4, 3, 1, 3, 0, 3, 4, 2, 4, 3, 3, 3, 1, 3, 4, 2, 0, 4, 0, 1,
        3, 0, 1, 0, 2, 2, 1, 0, 4, 4, 1, 0, 3, 1, 2, 2, 1, 1, 1, 4, 2, 1, 4, 3,
        0, 4, 2, 1, 2, 2, 4, 2, 3, 2, 3, 3, 3, 0, 4, 0, 3, 2, 4, 1, 0, 1, 0, 0,
        0, 0, 0, 1, 1, 0, 2, 3, 3, 4, 3, 3, 2, 0, 0, 4, 2, 3, 4, 1, 0, 2, 4, 2,
        1, 4, 2, 4, 4, 1, 0, 2, 0, 4, 2, 2, 1, 0, 0, 2, 3, 4, 0, 2, 0, 0, 1, 3,
        4, 4, 2, 4, 0, 2, 2, 2, 0, 1, 4, 2, 4, 2, 2, 2, 1, 1, 3, 0, 1, 4, 3, 2,
        1, 0, 4, 2, 3, 0, 2, 0, 3, 4, 4, 2, 2, 4, 4, 2, 4, 1, 4, 1, 3, 3, 3, 3,
        3, 0, 0, 3, 1, 3, 1, 4, 3, 3, 2, 2, 3, 3, 0, 4, 2, 4, 3, 2, 4, 4, 4, 2,
        4, 0, 2, 2, 4, 3, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 3, 1, 2, 2, 0, 0, 1, 3,
        4, 1, 2, 4, 0, 3, 3, 3, 2, 2, 0, 3, 1, 4, 3, 3, 3, 4, 0, 3, 4, 0, 3, 2,
        3, 1, 2, 2, 0, 4, 3, 2, 4, 0, 1, 2, 0, 3, 1, 0, 2, 0, 4, 3, 1, 0, 0, 1,
        4, 2, 1, 1, 0, 1, 4, 0, 1, 3, 3, 1, 1, 3, 2, 1, 4, 3, 3, 0, 4, 2, 0, 1,
        0, 0, 0, 3, 4, 4, 3, 4, 4, 0, 4,

In [19]:
tld0.y

tensor([0, 3, 1, 2, 4, 3, 1, 3, 0, 3, 4, 2, 4, 3, 3, 3, 1, 3, 4, 2, 0, 4, 0, 1,
        3, 0, 1, 0, 2, 2, 1, 0, 4, 4, 1, 0, 3, 1, 2, 2, 1, 1, 1, 4, 2, 1, 4, 3,
        0, 4, 2, 1, 2, 2, 4, 2, 3, 2, 3, 3, 3, 0, 4, 0, 3, 2, 4, 1, 0, 1, 0, 0,
        0, 0, 0, 1, 1, 0, 2, 3, 3, 4, 3, 3, 2, 0, 0, 4, 2, 3, 4, 1, 0, 2, 4, 2,
        1, 4, 2, 4, 4, 1, 0, 2, 0, 4, 2, 2, 1, 0, 0, 2, 3, 4, 0, 2, 0, 0, 1, 3,
        4, 4, 2, 4, 0, 2, 2, 2, 0, 1, 4, 2, 4, 2, 2, 2, 1, 1, 3, 0, 1, 4, 3, 2,
        1, 0, 4, 2, 3, 0, 2, 0, 3, 4, 4, 2, 2, 4, 4, 2, 4, 1, 4, 1, 3, 3, 3, 3,
        3, 0, 0, 3, 1, 3, 1, 4, 3, 3, 2, 2, 3, 3, 0, 4, 2, 4, 3, 2, 4, 4, 4, 2,
        4, 0, 2, 2, 4, 3, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 3, 1, 2, 2, 0, 0, 1, 3,
        4, 1, 2, 4, 0, 3, 3, 3, 2, 2, 0, 3, 1, 4, 3, 3, 3, 4, 0, 3, 4, 0, 3, 2,
        3, 1, 2, 2, 0, 4, 3, 2, 4, 0, 1, 2, 0, 3, 1, 0, 2, 0, 4, 3, 1, 0, 0, 1,
        4, 2, 1, 1, 0, 1, 4, 0, 1, 3, 3, 1, 1, 3, 2, 1, 4, 3, 3, 0, 4, 2, 0, 1,
        0, 0, 0, 3, 4, 4, 3, 4, 4, 0, 4,

In [20]:
loss_fn(res0, tld0.y)

tensor(0.0643, grad_fn=<NllLossBackward0>)

In [21]:
L1Loss()(res0.argmax(axis = 1).to(torch.float), tld0.y.to(torch.float))

tensor(0.)

In [22]:
res1 = model(tld1.x, tld1.edge_index, tld1.batch)

In [23]:
res1.argmax(axis = 1)

tensor([1, 1, 1, 3, 1, 4, 0, 2, 2, 0, 2, 2, 0, 2, 2, 3, 4, 4, 2, 4, 3, 4, 4, 3,
        2, 2, 4, 3, 4, 4, 2, 2, 2, 1, 3, 0, 4, 2, 4, 4, 0, 3, 2, 2, 2, 1, 3, 3,
        3, 0, 0, 2, 4, 3, 2, 4, 2, 4, 1, 1, 2, 0, 3, 1, 0, 2, 4, 3, 3, 3, 0, 3,
        3, 2, 2, 2, 4, 4, 1, 3, 1, 1, 1, 0, 1, 0, 1, 4, 4, 1, 0, 0, 4, 2, 2, 1,
        2, 4, 3, 3, 2, 2, 4, 3, 3, 2, 3, 4, 4, 2, 4, 4, 1, 0, 0, 4, 4, 2, 3, 4,
        3, 4, 3, 2, 2, 1, 3, 2, 2, 0, 1, 1, 4, 3, 1, 1, 0, 3, 3, 2, 0, 1, 4, 4,
        0, 0, 3, 2, 3, 0, 2, 2, 3, 1, 3, 3, 0, 4, 4, 1, 4, 4, 1, 2, 0, 0, 3, 4,
        2, 4, 0, 3, 4, 1, 1, 0, 4, 3, 1, 2, 0, 0, 1, 4, 0, 2, 4, 0, 4, 2, 4, 2,
        2, 2, 4, 3, 3, 3, 2, 3, 2, 2, 4, 0, 3, 2, 1, 3, 2, 1, 2, 2, 0, 2, 0, 2,
        3, 3, 4, 1, 2, 4, 4, 0, 0, 2, 2, 2, 0, 4, 2, 4, 1, 2, 1, 3, 0, 3, 2, 4,
        4, 3, 0, 4, 4, 4, 1, 0, 4, 3, 4, 2, 0, 1, 0, 2])

In [24]:
tld1.y

tensor([3, 2, 3, 1, 2, 3, 0, 2, 4, 0, 2, 4, 0, 2, 4, 4, 3, 4, 1, 4, 4, 1, 2, 4,
        3, 1, 4, 2, 3, 4, 3, 2, 1, 1, 2, 0, 2, 2, 3, 3, 0, 4, 1, 4, 2, 3, 2, 3,
        2, 0, 0, 3, 4, 2, 4, 2, 1, 2, 1, 2, 4, 0, 3, 4, 0, 2, 2, 3, 1, 2, 0, 1,
        2, 4, 1, 1, 4, 3, 1, 3, 4, 2, 1, 1, 1, 0, 2, 4, 2, 1, 0, 0, 4, 4, 2, 4,
        2, 1, 3, 3, 3, 2, 4, 2, 4, 4, 4, 2, 4, 3, 4, 2, 1, 0, 0, 4, 3, 4, 4, 4,
        2, 3, 4, 1, 1, 1, 3, 1, 4, 0, 1, 4, 2, 4, 1, 1, 0, 4, 2, 2, 0, 1, 4, 4,
        0, 0, 3, 2, 3, 0, 1, 2, 4, 1, 2, 1, 0, 4, 1, 1, 4, 3, 2, 2, 1, 0, 4, 4,
        2, 1, 0, 2, 4, 1, 4, 0, 2, 4, 2, 3, 0, 0, 2, 4, 0, 4, 1, 0, 4, 1, 4, 3,
        1, 1, 2, 1, 2, 2, 2, 3, 1, 3, 2, 0, 2, 1, 1, 4, 3, 2, 2, 2, 0, 3, 0, 2,
        4, 3, 4, 3, 4, 3, 3, 0, 0, 3, 2, 4, 0, 4, 2, 2, 1, 2, 1, 2, 0, 4, 2, 3,
        2, 2, 0, 1, 4, 4, 2, 1, 4, 2, 1, 4, 0, 2, 0, 2])

In [25]:
for tld in test_loader:
    res = model(tld.x, tld.edge_index, tld.batch)
    print(res.argmax(axis = 1).cpu())

tensor([1, 1, 1, 3, 1, 4, 0, 2, 2, 0, 2, 2, 0, 2, 2, 3, 4, 4, 2, 4, 3, 4, 4, 3,
        2, 2, 4, 3, 4, 4, 2, 2, 2, 1, 3, 0, 4, 2, 4, 4, 0, 3, 2, 2, 2, 1, 3, 3,
        3, 0, 0, 2, 4, 3, 2, 4, 2, 4, 1, 1, 2, 0, 3, 1, 0, 2, 4, 3, 3, 3, 0, 3,
        3, 2, 2, 2, 4, 4, 1, 3, 1, 1, 1, 0, 1, 0, 1, 4, 4, 1, 0, 0, 4, 2, 2, 1,
        2, 4, 3, 3, 2, 2, 4, 3, 3, 2, 3, 4, 4, 2, 4, 4, 1, 0, 0, 4, 4, 2, 3, 4,
        3, 4, 3, 2, 2, 1, 3, 2, 2, 0, 1, 1, 4, 3, 1, 1, 0, 3, 3, 2, 0, 1, 4, 4,
        0, 0, 3, 2, 3, 0, 2, 2, 3, 1, 3, 3, 0, 4, 4, 1, 4, 4, 1, 2, 0, 0, 3, 4,
        2, 4, 0, 3, 4, 1, 1, 0, 4, 3, 1, 2, 0, 0, 1, 4, 0, 2, 4, 0, 4, 2, 4, 2,
        2, 2, 4, 3, 3, 3, 2, 3, 2, 2, 4, 0, 3, 2, 1, 3, 2, 1, 2, 2, 0, 2, 0, 2,
        3, 3, 4, 1, 2, 4, 4, 0, 0, 2, 2, 2, 0, 4, 2, 4, 1, 2, 1, 3, 0, 3, 2, 4,
        4, 3, 0, 4, 4, 4, 1, 0, 4, 3, 4, 2, 0, 1, 0, 2])
tensor([2, 0, 0, 3, 4, 3, 0, 4, 2, 4, 4, 4, 2, 2, 2, 2, 0, 2, 3, 4, 1, 0, 1, 2,
        3, 0, 3, 3, 1, 3, 1, 0, 0, 4, 3, 0, 0, 3, 1, 2, 1, 1, 3

In [26]:
len(dataset)

2000

In [27]:
loss_fn(res1, tld1.y)

tensor(1.5402, grad_fn=<NllLossBackward0>)

In [28]:
L1Loss()(res1.argmax(axis = 1).to(torch.float), tld1.y.to(torch.float))

tensor(0.7773)

In [29]:
train_y = [d.y.item() for d in train_dataset]

In [30]:
np.unique(train_y)

array([0, 1, 2, 3, 4])

In [31]:
test_y = [d.y.item() for d in test_dataset]

In [32]:
np.unique(test_y)

array([0, 1, 2, 3, 4])

In [33]:
np.unique([d.y.item() for d in val_dataset])

array([0, 1, 2, 3, 4])

In [34]:
data = train_dataset[0]

In [35]:
data = data.to(device)

In [36]:
data.x

tensor([[0, 2, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 1, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 1,  ..., 0, 0, 1],
        [0, 0, 0,  ..., 0, 1, 0]])

In [37]:
model.node_emb

Embedding(10000, 2)

In [38]:
x = model.node_emb(data.x.squeeze()).view(-1, 80)

In [39]:
x

tensor([[ 1.0873,  0.1086,  0.9810,  ...,  0.1086,  1.0873,  0.1086],
        [ 1.0873,  0.1086,  1.0873,  ...,  0.1086,  1.0873,  0.1086],
        [ 1.0873,  0.1086,  1.0873,  ...,  1.1391,  1.0873,  0.1086],
        ...,
        [ 1.0873,  0.1086,  1.0873,  ...,  0.1086,  1.0873,  0.1086],
        [ 1.0873,  0.1086,  1.0873,  ...,  0.1086, -1.0730,  1.1391],
        [ 1.0873,  0.1086,  1.0873,  ...,  1.1391,  1.0873,  0.1086]],
       grad_fn=<ViewBackward0>)

In [40]:
x = model.convs[0](x, data.edge_index)

In [41]:
x

tensor([[-0.5863, -0.4789,  0.1791,  ...,  0.1434, -0.2350, -0.2029],
        [-0.5876, -0.4833,  0.1811,  ...,  0.1518, -0.2389, -0.2073],
        [-0.5984, -0.5648,  0.0596,  ...,  0.1285, -0.2463, -0.1648],
        ...,
        [-0.5901, -0.4803,  0.1876,  ...,  0.1478, -0.2438, -0.2027],
        [-0.5531, -0.4870,  0.1479,  ...,  0.2146, -0.1797, -0.2445],
        [-0.5807, -0.5874,  0.1187,  ...,  0.1512, -0.2544, -0.1558]],
       grad_fn=<AddBackward0>)

In [42]:
import numpy as np

In [43]:
train_y = [d.y.item() for d in train_dataset]

In [44]:
test_y = [d.y.item() for d in test_dataset]

In [45]:
np.unique(train_y)

array([0, 1, 2, 3, 4])

In [46]:
np.unique(test_y)

array([0, 1, 2, 3, 4])

In [47]:
np.unique([d.y.item() for d in val_dataset])

array([0, 1, 2, 3, 4])

In [48]:
a = np.histogram(train_y, np.arange(25))

In [49]:
b = np.histogram(test_y, np.arange(25))

In [50]:
dataset[1].x

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [2, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 2, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 1],
        [0, 0, 0,  ..., 0, 1, 0]])

In [51]:
tld0.x.squeeze().shape

torch.Size([20480, 40])

In [52]:
model.pre(tld0.x, tld0.edge_index)

AttributeError: 'G2Dist_GCNConv_Global' object has no attribute 'pre'

In [None]:
dataset[0].x.squeeze()

In [None]:
test(train_loader)

In [None]:
len(dataset)

In [None]:
train_loader.dataset