In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

from dataset.dataset import NoCDataset
from model.vanilla import VanillaModel

from tqdm import tqdm

In [3]:
dataset = NoCDataset()
print(f"#Samples = {len(dataset)}")
print(dataset[0])

# TODO: use advanced dgl.dataloader instead of manually dividing dataset
num_training = 0.9 * len(dataset)


#Samples = 980
Graph(num_nodes=31, num_edges=40,
      ndata_schemes={'delay': Scheme(shape=(), dtype=torch.float64), 'in_latency': Scheme(shape=(), dtype=torch.int64), 'out_latency': Scheme(shape=(), dtype=torch.int64), 'op_type': Scheme(shape=(4,), dtype=torch.int64)}
      edata_schemes={'size': Scheme(shape=(), dtype=torch.float64), 'cnt': Scheme(shape=(), dtype=torch.float64), 'route': Scheme(shape=(), dtype=torch.int64)})


In [4]:
device = "cuda:0"

model = VanillaModel(n_feats=5, e_feats=3, h_feats=100).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)

epoches = 20
for e in tqdm(range(epoches)):
    for i, g in enumerate(dataset):
        if i > num_training:
            break

        #TODO: merge feature extraction to NoCDataset
        g = g.to(device)
        nfeat = torch.concat([
            g.ndata["delay"].reshape(-1, 1).float(), 
            g.ndata["op_type"].float()
            ], dim=1)
        efeat = torch.concat([
            g.edata["size"].reshape(-1, 1).float(),
            g.edata["cnt"].reshape(-1, 1).float(),
            g.edata["route"].reshape(-1, 1).float(),
            ], dim=1)
        in_latency = g.ndata["in_latency"].float()

        pred = model(g, nfeat, efeat).squeeze()
        pred = pred * g.ndata["op_type"][:, 2]  # mask non-worker
        loss = F.mse_loss(pred, in_latency)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 50 == 0:
            print(f"iteration: {i}; loss = {loss}")
            # print(f"in_latency = {in_latency}")
            # print(f"pred = {pred}")


  0%|          | 0/20 [00:00<?, ?it/s]

iteration: 0; loss = 37109733785600.0
iteration: 50; loss = 6.909738231844045e+16
iteration: 100; loss = 4531420814901248.0
iteration: 150; loss = 6.514671815514653e+18
iteration: 200; loss = 3222119093633024.0
iteration: 250; loss = 2.1475583708102237e+22
iteration: 300; loss = 3.5374218164530315e+19
iteration: 350; loss = 19088753557504.0
iteration: 400; loss = 3.424808076317609e+19
iteration: 450; loss = 5.1390836756814234e+17
iteration: 500; loss = 4.4445379858949734e+17
iteration: 550; loss = 4.825674070936781e+17
iteration: 600; loss = 36623093760.0
iteration: 650; loss = 6.247359098325565e+17
iteration: 700; loss = 16848318889984.0
iteration: 750; loss = 2116576346112.0
iteration: 800; loss = 5.458869502974362e+16
iteration: 850; loss = 7835879473152.0


  5%|▌         | 1/20 [00:12<03:59, 12.60s/it]

iteration: 0; loss = 95993668304896.0
iteration: 50; loss = 3.465037665743667e+16
iteration: 100; loss = 1778385298653184.0
iteration: 150; loss = 6.366141538497462e+18
iteration: 200; loss = 2308832952844288.0
iteration: 250; loss = 2.389983085511913e+22
iteration: 300; loss = 3.4838155668454638e+19
iteration: 350; loss = 20359149518848.0
iteration: 400; loss = 3.073441543475744e+19
iteration: 450; loss = 5.085232344932024e+17
iteration: 500; loss = 4.415020221857792e+17
iteration: 550; loss = 4.791061444894392e+17
iteration: 600; loss = 35908726784.0
iteration: 650; loss = 4.680845368533975e+17
iteration: 700; loss = 6603489148928.0
iteration: 750; loss = 1025789263872.0
iteration: 800; loss = 2.6814580340555776e+16
iteration: 850; loss = 3413015789568.0


 10%|█         | 2/20 [00:24<03:36, 12.03s/it]

iteration: 0; loss = 45022682546176.0
iteration: 50; loss = 1.876416450789376e+16
iteration: 100; loss = 603643023720448.0
iteration: 150; loss = 3.552492382530306e+18
iteration: 200; loss = 590801675485184.0


 10%|█         | 2/20 [00:27<04:05, 13.62s/it]


KeyboardInterrupt: 

In [None]:
# test accuracy
# we use relative error to measure

for i, g in enumerate(dataset):
    if i <= num_training:
        continue


    g = g.to(device)
    nfeat = torch.concat([
        g.ndata["delay"].reshape(-1, 1).float(), 
        g.ndata["op_type"].float()
        ], dim=1).to(device)
    efeat = torch.concat([
        g.edata["size"].reshape(-1, 1).float(),
        g.edata["cnt"].reshape(-1, 1).float(),
        g.edata["route"].reshape(-1, 1).float(),
        ], dim=1).to(device)
    in_latency = torch.log(g.ndata["in_latency"].float() + 1).to(device)

    pred = model(g, nfeat, efeat).squeeze()
    print(sum(pred))
    print(sum(in_latency))
    # loss = F.mse_loss(pred, in_latency)
    # print(loss)

    pred = torch.exp(pred)
    # print(in_latency)
    # print(pred)


tensor(0.1151, device='cuda:0', grad_fn=<AddBackward0>)
tensor(36.4881, device='cuda:0')
tensor(0.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(56.5383, device='cuda:0')
tensor(0.0438, device='cuda:0', grad_fn=<AddBackward0>)
tensor(52.1333, device='cuda:0')
tensor(0.0438, device='cuda:0', grad_fn=<AddBackward0>)
tensor(24.1608, device='cuda:0')
tensor(0.1041, device='cuda:0', grad_fn=<AddBackward0>)
tensor(37.5766, device='cuda:0')
tensor(0.0548, device='cuda:0', grad_fn=<AddBackward0>)
tensor(41.8914, device='cuda:0')
tensor(0.0438, device='cuda:0', grad_fn=<AddBackward0>)
tensor(55.6862, device='cuda:0')
tensor(0.0986, device='cuda:0', grad_fn=<AddBackward0>)
tensor(88.2572, device='cuda:0')
tensor(0.0219, device='cuda:0', grad_fn=<AddBackward0>)
tensor(37.9032, device='cuda:0')
tensor(0.0384, device='cuda:0', grad_fn=<AddBackward0>)
tensor(43.5961, device='cuda:0')
tensor(0.0822, device='cuda:0', grad_fn=<AddBackward0>)
tensor(67.8489, device='cuda:0')
tensor(0.0329, device