In [1]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from circuit import Circuit
import numpy as np
import dgl.function as fn
import random
import networkx as nx
from utils import *
from dgl.data.utils import save_graphs, load_graphs
import time

Using backend: pytorch


In [2]:
import os.path
preprocess_begin = time.time()

design = "leon3_GNN"
path = design+"/heterograph_600.bin"

cir = Circuit(design)
root = "/autofs/home/sh528/M3Ddesigns/"+design+"/"
cir.parseHierVerilog(root+"die0.v")
cir.parseHierVerilog(root+"die1.v")
cir.parseTop(root+"top.v")
# cir.parseVerilog(design+"/"+design+".v")
# cir.parsePartition(design+"/die0.rpt")
stil = design+"/TDF_600.stil"

# if False:
if os.path.isfile(path):
    n_patterns = cir.parseSTIL(stil, -2)
    dic, topEdge = backprop(cir,False)
    hg = load_graphs(path)[0][0]
else:
    n_patterns = cir.parseSTIL(stil)
    edge = CreateGraphByFaultSite(cir)
    dic, topEdge = backprop(cir)
    

    hg = dgl.heterograph({ ('topNode', 'topEdge', 'faultSite'): topEdge, ('faultSite', 'net', 'faultSite'): edge })
    feats = torch.tensor([cir.Node[n].net.feats for n in cir.Node])
    hg.nodes['faultSite'].data['feats'] = feats
    hg.nodes['faultSite'].data['in_degree'] = hg.in_degrees(etype='net').view(-1,1).float()
    hg.nodes['faultSite'].data['out_degree'] = hg.out_degrees(etype='net').view(-1,1).float()
    hg.nodes['faultSite'].data['top_degree'] = hg.in_degrees(etype='topEdge').view(-1,1).float()
    hg.nodes['faultSite'].data['level'] = getLevel(cir)
    hg.nodes['faultSite'].data['loc'] = getLocation(cir, hg.num_nodes('faultSite'))
    hg.nodes['faultSite'].data['more'] = addfeatures(cir, hg.num_nodes('faultSite'))
    save_graphs(path, hg)
    
print(hg)

Start parsing verilog netlist
nodeID: 1086147
End parsing verilog netlist
CPU time: 12.77s

Start parsing verilog netlist
nodeID: 1948752
End parsing verilog netlist
CPU time: 11.95s

Start parsing top verilog netlist
End parsing verilog netlist
CPU time: 8.14s

Start parsing STIL patterns
Pass Pattern 0
Final Pat
End parsing STIL patterns
CPU time: 16.94s

Graph(num_nodes={'faultSite': 2715084, 'topNode': 108720},
      num_edges={('faultSite', 'net', 'faultSite'): 2844138, ('topNode', 'topEdge', 'faultSite'): 617775554},
      metagraph=[('faultSite', 'faultSite', 'net'), ('topNode', 'faultSite', 'topEdge')])


In [3]:
print(torch.sum(hg.nodes['faultSite'].data['loc'], dim=0))

tensor([1086147.,  862605.,  766332.])


In [4]:
sample_path = design+"/samples_600.bin"

if os.path.isfile(sample_path):
    dataset, label_dict = load_graphs(sample_path)
    labels = label_dict["labels"]
    subgraphs = []
    for i in range(len(dataset)):
        subgraphs.append((dataset[i], labels[i].item()))
else:
    start_pat = 0
    end_pat = n_patterns

    preprocess_st = time.time()

    subgraphs, dstIDset = getDatasetfromLog(cir, design, dic, hg, n_patterns, 5000, start_pat, end_pat, True)
    # subgraphs = getSubgraphs(hg, dataset, dstIDset, True, start_pat, end_pat)
    print("Number of samples: {}".format(len(subgraphs)))

preprocess_end = time.time()

print("Total CPU time for preprocessing: {}".format(preprocess_end-preprocess_begin))

Total CPU time for preprocessing: 517.3026375770569


In [None]:
sg = subgraphs[2][0]
# cnt = 0
# for n in cir.Node:
#     if cir.Node[n].ID in sg.ndata[dgl.NID]['faultSite']:
#         print("{}, {}, {}".format(n,cir.Node[n].ID,cnt))
#         cnt += 1
g = dgl.to_homogeneous(sg)
G = dgl.to_networkx(g)
color_map = []
for i in range(g.num_nodes()):
    if hg.nodes['faultSite'].data['loc'][sg.ndata[dgl.NID]['faultSite'][i]][0] == 1:
        color_map.append('blue')
    elif  hg.nodes['faultSite'].data['loc'][sg.ndata[dgl.NID]['faultSite'][i]][1] == 1:
        color_map.append('green')
    elif  hg.nodes['faultSite'].data['loc'][sg.ndata[dgl.NID]['faultSite'][i]][2] == 1:
        color_map.append('red')
nx.draw(G, node_color=color_map, with_labels=True)
plt.show()

In [5]:
from collections import Counter
labels = [i[1] for i in subgraphs]
print(Counter(labels))

Counter({2: 2024, 0: 1851, 1: 1125})


In [6]:
from dgl.nn.pytorch import GraphConv
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, 16)
        self.linear1 = nn.Linear(16+3, num_classes)
    
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        
#         print(h)
#         h1 = F.softmax(h, dim=0)
        ratio = torch.sum(g.ndata['infeats'][:,3:6], dim=0)/g.num_nodes()
#         print(ratio)
#         print(ratio.unsqueeze(0))
#         loc = g.ndata['infeats'][:,3:6].argmax(0)
#         print(loc)
        
        
#         print(h)
#         print(h*ratio)
#         print(ratio)
        
        g.ndata['h_final'] = h
        h_final = dgl.readout_nodes(g, 'h_final', op='max')
#         return h_final
        h_final = F.softmax(h_final, dim=0)
        h_final = torch.cat([h_final, ratio.unsqueeze(0)], dim=1)
#         print(h_final)
#         print(dgl.readout_nodes(g, 'h2', op='max'))
        return self.linear1(h_final)

In [7]:
num_examples = len(subgraphs)
num_train = int(num_examples * 0.7)
num_val = int(num_examples * 0.15)

random.shuffle(subgraphs)

# # Random
train_sampler = SubsetRandomSampler(torch.arange(num_train))
val_sampler = SubsetRandomSampler(torch.arange(num_train, num_train+num_val))
test_sampler = SubsetRandomSampler(torch.arange(num_train+num_val, num_examples))

train_dataloader = GraphDataLoader(
    subgraphs, sampler=train_sampler, batch_size=1, drop_last=False)
val_dataloader = GraphDataLoader(
    subgraphs, sampler=val_sampler, batch_size=1, drop_last=False)
test_dataloader = GraphDataLoader(
    subgraphs, sampler=test_sampler, batch_size=1, drop_last=False)


In [None]:
train_begin = time.time()
torch.cuda.empty_cache()

model = GCN(11,128,3)
model = model.to('cuda')
PATH = design+"/saved_model"
# g = g.to('cpu')
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

train_loss_values = []
val_loss_values = []
train_acc_values = []
val_acc_values = []

for epoch in range(50):
    print("\nEpoch %d:" %epoch)
    print("Training...")
    model.train()
    correct = []
    incorrect = []

    train_acc = 0
    train_loss = 0
    num_tests = 0
    train_error = np.zeros((3,3))
    for g, l in train_dataloader:
        g = g.to('cuda')
        labels = l.to('cuda')
#         g = dgl.add_reverse_edges(g)
#         g = dgl.add_self_loop(g)

        infeats = g.ndata['infeats']
#         g.ndata['w'] = g.ndata['infeats'][:,3:6]
#         g.ndata['w'] = torch.matmul(g.ndata['infeats'][:,3:6], torch.sum(g.ndata['infeats'][:,3:6], dim=0)/g.num_nodes())
#         print(g.ndata['w'].shape)


        pred = model(g, infeats)
#         print(pred.shape)
        loss = F.cross_entropy(pred, labels)
#         l1_lambda = 0.001
#         l1_norm = sum(p.abs().sum() for p in model.parameters())
#         loss = loss + l1_lambda * l1_norm
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc += (pred.argmax(1) == labels).sum().item()
        train_loss += loss
        num_tests += len(labels)
        for i in range(len(labels)):
                p = pred[i]
                l = labels[i]
                train_error[p.argmax(0).item()][l.item()] += 1

    avg_loss = train_loss/num_tests
    avg_acc = train_acc/num_tests
    train_loss_values.append(avg_loss)
    train_acc_values.append(avg_acc)

    print('In epoch {}, train loss: {:.3f}, train acc: {:.3f}'.format(epoch, avg_loss, avg_acc))
    print(train_error)
#     print("Mean correct: {}, Mean incorrect: {}".format(np.mean(correct), np.mean(incorrect)))

    print("Validation...")
    val_error = np.zeros((3,3))
    model.eval()
    correct = []
    incorrect = []
    val_acc = 0
    val_loss = 0
    max_acc = 0
    num_tests = 0

#     random.shuffle(val_set)
    with torch.no_grad():
        for g, l in val_dataloader:
            g = g.to('cuda')
            labels = l.to('cuda')
#             g = dgl.add_reverse_edges(g)
#             g = dgl.add_self_loop(g)

            infeats = g.ndata['infeats']
#             g.ndata['w'] = g.ndata['infeats'][:,3:6]

            pred = model(g, infeats)
            loss = F.cross_entropy(pred, labels)
            optimizer.zero_grad()

            val_acc += (pred.argmax(1) == labels).sum().item()

            val_loss += loss
            num_tests += len(labels)

            for i in range(len(labels)):
                p = pred[i]
                l = labels[i]
                val_error[p.argmax(0).item()][l.item()] += 1
            
    avg_loss = val_loss/num_tests
    avg_acc = val_acc/num_tests
    val_loss_values.append(avg_loss)
    val_acc_values.append(avg_acc)

    print('In epoch {}, val loss: {:.3f}, val acc: {:.3f}'.format(epoch, avg_loss, avg_acc))
    print(val_error)
    
    if avg_acc > max_acc:
        max_acc = avg_acc
        torch.save(model.state_dict(), PATH)

#     
train_end = time.time()
print("\nTraining time for {} epochs: {}\n".format(epoch+1, train_end-train_begin))



Epoch 0:
Training...
In epoch 0, train loss: 1.078, train acc: 0.387
[[466. 292. 512.]
 [  0.   0.   0.]
 [836. 504. 890.]]
Validation...
In epoch 0, val loss: 1.072, val acc: 0.367
[[273. 160. 315.]
 [  0.   2.   0.]
 [  0.   0.   0.]]

Epoch 1:
Training...


In [None]:
import matplotlib.pyplot as plt
plt.plot(np.arange(0,len(train_loss_values),1), train_loss_values,'b', np.arange(0,len(val_loss_values),1), val_loss_values, 'g')
plt.legend(["Train", "Validation"])
plt.title("Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
# plt.ylim([0,1])
plt.show()

plt.plot(np.arange(0,len(train_acc_values),1), train_acc_values,'b', np.arange(0,len(val_acc_values),1), val_acc_values, 'g')
# plt.plot(np.arange(0,25,1), train_acc_values[-25:],'b', np.arange(0,25,1), val_acc_values[-25:], 'g')

plt.legend(["Train", "Validation"])
plt.title("Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()


In [None]:
model = GCN(11,128,3)
model.load_state_dict(torch.load(PATH))
model = model.to('cuda')
model.eval()

test_acc = 0
test_loss = 0
num_tests = 0
test_error = np.zeros((3,3))
test_dist = np.zeros((3,3))

with torch.no_grad():
    for g, l in test_dataloader:
        g = g.to('cuda')
        labels = l.to('cuda')
        
        infeats = g.ndata['infeats']

        pred = model(g, infeats)
        loss = F.cross_entropy(pred, labels)
        optimizer.zero_grad()

        test_acc += (pred.argmax(1) == labels).sum().item()
        test_loss += loss
        num_tests += len(labels)
        
        for i in range(len(labels)):
                p = pred[i]
                l = labels[i]
                test_error[p.argmax(0).item()][l.item()] += 1
                smax = F.softmax(p, dim=0)
                test_dist[p.argmax(0).item()][l.item()] += abs(smax[p.argmax(0).item()].item() - smax[l.item()].item())
        

avg_loss = test_loss/num_tests
avg_acc = test_acc/num_tests

print("test accuracy: {}".format(avg_acc))
print(test_error)
print(test_dist/test_error)

In [None]:
model = model.to('cuda')
model.eval()
test_acc = 0
test_loss = 0
num_tests = 0
test_error = np.zeros((3,3))
test_dist = np.zeros((3,3))
graphs = []
g2 = []
gl = []

with torch.no_grad():
    for g, l in test_dataloader:
        g = g.to('cuda')
        labels = l.to('cuda')
#         g = dgl.add_reverse_edges(g)
#         g = dgl.add_self_loop(g)
        infeats = g.ndata['infeats']
        ubg = dgl.unbatch(g)

        pred = model(g, infeats)
        loss = F.cross_entropy(pred, labels)
        optimizer.zero_grad()

        test_acc += (pred.argmax(1) == labels).sum().item()
        test_loss += loss
        num_tests += len(labels)
        
        for i in range(len(labels)):
            p = pred[i]
            ll = labels[i]
            
            if p.argmax(0) != ll and p.argmax(0).item() == 2:
                graphs.append(ubg[i].to('cpu'))
                gl.append(ll.to('cpu'))
            elif p.argmax(0) == ll and p.argmax(0).item() == 2:
                g2.append(ubg[i].to('cpu'))

avg_loss = test_loss/num_tests
avg_acc = test_acc/num_tests

print("test accuracy: {}".format(avg_acc))
print(test_error)
print(test_dist/test_error)

In [None]:
for j in range(len(graphs)):
    g1 = graphs[j]
    G = dgl.to_networkx(g1)
    color_map = []
    for i in range(g1.num_nodes()):
        if g1.ndata['infeats'][i][3] == 1:
            color_map.append('blue')
        elif g1.ndata['infeats'][i][4] == 1:
            color_map.append('green')
        elif g1.ndata['infeats'][i][5] == 1:
            color_map.append('red')
    nx.draw(G, node_color=color_map, with_labels=True)
    print(gl[j])
    plt.show()

In [None]:
for j in range(len(g2)):
    g1 = g2[j]
    G = dgl.to_networkx(g1)
    color_map = []
    for i in range(g1.num_nodes()):
        if g1.ndata['infeats'][i][3] == 1:
            color_map.append('blue')
        elif g1.ndata['infeats'][i][4] == 1:
            color_map.append('green')
        elif g1.ndata['infeats'][i][5] == 1:
            color_map.append('red')
    nx.draw(G, node_color=color_map, with_labels=True)
#     print(gl[j])
    plt.show()

In [None]:
partitions = [0]
partitions.append(int(n_patterns/3))
partitions.append(int(n_patterns/3)*2)
partitions.append(int(n_patterns))
print("Partitions: {}".format(partitions))

gg = getDatasetforVoting(cir, design, dic, hg, n_patterns, partitions, 8000, 9000)

In [None]:
model1 = GCN()
model1.load_state_dict(torch.load("tate_GNN/model_0_to_197"))
model1.eval()

model2 = GCN()
model2.load_state_dict(torch.load("tate_GNN/model_197_to_394"))
model2.eval()

model3 = GCN()
model3.load_state_dict(torch.load("tate_GNN/model_394_to_591"))
model3.eval()
model1 = model1.to('cuda')
model2 = model2.to('cuda')
model3 = model3.to('cuda')

In [None]:

num_tests = 0
test_acc = 0
for i in range(len(gg[0])):
    results = np.zeros(3)
    with torch.no_grad():
        g, l = gg[0][i]
        if g != 0:
            g = g.to('cuda')
            infeats = g.ndata['infeats']
            pred = model1(g, infeats)
            idx = pred.argmax(1).item()
            results[idx] += 1
    
    with torch.no_grad():
        g, l = gg[1][i]
        if g != 0:
            g = g.to('cuda')
            infeats = g.ndata['infeats']
            pred = model2(g, infeats)
            idx = pred.argmax(1).item()
            results[idx] += 1
        
    with torch.no_grad():
        g, l = gg[2][i]
        if g != 0:
            g = g.to('cuda')
            infeats = g.ndata['infeats']
            pred = model3(g, infeats)
            idx = pred.argmax(1).item()
            results[idx] += 1

#     print(results)
#     print(l)
    
    if np.sum(results) == 0:
        continue
    test_acc += (results.argmax(0) == l).sum().item()
    num_tests += 1
print("Test accuracy: {}".format(test_acc/num_tests))

In [None]:
def getDatasetforVoting(cir, design, dic, g, num_patterns, partitions, start_sample=1, end_sample=-1):
    print("Start generating data for voting")
#     f1 = open(design+"/"+design+"_inject_extra.dat", "r")
    f1 = open(design+"/unique.dat", "r")
    l = f1.readlines()
    f1.close()
    l = l[start_sample:end_sample]
    
#     start_points = partitions[:-1]
#     end_points = partitions[1:]
    
    subgraphs = [[] for i in range(len(partitions)-1)]

    for line in l:
        start_pat = partitions[0]
        end_pat = partitions[-1]
        
        words = line.split()
        gname = words[1].split("/")[0]
        pname = words[1].split("/")[1]
        logname = design+"/Logs_w_MIV/"+gname+"_"+pname+"_st"+words[0]+".log"

        dstID = cir.Node[gname+"_"+pname].ID
        label = -1
        if g.nodes['faultSite'].data['loc'][dstID][0] == 1:
            label = 0
        elif g.nodes['faultSite'].data['loc'][dstID][1] == 1:
            label = 1
        else:
            label = 2
            
        

        f2 = open(logname, "r")
        l2 = f2.readlines()[1:]
        f2.close()
        num_pat = end_pat-start_pat
        r = np.zeros((g.number_of_nodes('topNode'), num_pat), dtype=np.dtype('float32'))
        success = True
        for fault in l2:
            w2 = fault.split()
            if len(w2) != 5:
                success = False
                break
            pat = int(w2[0])-1

#             if pat >= num_patterns:
#                 break
            if pat < start_pat:
                continue

            if pat >= end_pat:
                break

            chname = w2[1]
            loc = int(w2[2])


            chain = cir.scanchains[cir.sopin.index(chname)]
            gname = chain[::-1][loc].name
            srcID = dic[gname]
            r[srcID][pat-start_pat] = 1.0

        if not success:
            continue
            
        getSubgraphsforVoting(g, r, label, dstID, partitions, subgraphs, True)
            
        if len(subgraphs[-1])%500 == 0:
            print(len(subgraphs[-1]))
    print("Finish generating data for voting")
    return subgraphs

In [None]:
def  getSubgraphsforVoting(hg, d, l, dstID, partitions, subgraphs, debug=False):    
    start_points = partitions[:-1]
    end_points = partitions[1:]
    
    with hg.local_scope():
        h = torch.from_numpy(d)
        hg.nodes['topNode'].data['h'] = h
        hg['topEdge'].update_all(message_func=fn.copy_u('h','m'), reduce_func=fn.sum('m', 'h'), etype='topEdge')
        
        for i in range(len(start_points)):
            start_pat = start_points[i]
            end_pat = end_points[i]
            h_N = torch.mul(hg.nodes['faultSite'].data['h'][:,start_pat:end_pat], hg.nodes['faultSite'].data['feats'][:,start_pat:end_pat])
            result = torch.sum(h[:,start_pat:end_pat],dim=0)
            t = torch.all(h_N == result, dim=1).float()
            nid = torch.nonzero(t, as_tuple=True)[0]
            if len(nid) == 0:
                subgraphs[i].append((0,0))
                continue
                
            g = hg.subgraph({'faultSite': nid})

            if debug:
                assert(dstID in g.ndata[dgl.NID]['faultSite'])

            infeats = torch.cat([g.nodes['faultSite'].data['in_degree'], g.nodes['faultSite'].data['out_degree'], g.nodes['faultSite'].data['top_degree']], dim=1)
            infeats = torch.cat([infeats, g.nodes['faultSite'].data['loc']], dim=1)
            infeats = torch.cat([infeats, g.nodes['faultSite'].data['level']], dim=1)
            infeats = torch.cat([infeats, g.nodes['faultSite'].data['more']], dim=1)
            infeats = torch.cat([infeats, g.in_degrees(etype='net').view(-1,1).float()], dim=1)
            infeats = torch.cat([infeats, g.out_degrees(etype='net').view(-1,1).float()], dim=1)

            g = dgl.to_homogeneous(g)
            g.ndata['infeats'] = infeats
            g = dgl.add_self_loop(g)
            subgraphs[i].append((g,l))
#     return g

In [None]:
def getDatasetfromLog2(cir, design, dic, g, num_patterns, num_samples=-1, start_pat=0, end_pat=-1, shuffle=True):
    print("Start generating data")
    
    
    dataset = []
    dstIDset = []
#     f1 = open(design+"/"+design+"_inject_extra.dat", "r")
    if cir.design == "ldpc_GNN" or cir.design == "tate_GNN":
        f1 = open(design+"/unique.dat", "r")
    else:
        f1 = open(design+"/TDF_600_inject.dat", "r")
    l = f1.readlines()
    f1.close()
    l = l[1:]
    if shuffle:
        random.shuffle(l)

    for line in l:
        words = line.split()
        gname = words[1].split("/")[0]
        pname = words[1].split("/")[1]
        if cir.design == "ldpc_GNN" or cir.design == "tate_GNN":
            logname = design+"/Logs_w_MIV/"+gname+"_"+pname+"_st"+words[0]+".log"
        else:
            logname = design+"/Logs_w_MIV_TDF_600/"+gname+"_"+pname+"_st"+words[0]+".log"
        
        if words[1].split("/")[-1] == "nextstate":
            pname = "D"
        elif words[1].split("/")[-1] == "IQ":
            pname = "Q"
        
        if gname.startswith("MIV") and pname == "A":
            pname = "Q"

        dstID = cir.Node[gname+"_"+pname].ID
        label = -1
        if g.nodes['faultSite'].data['loc'][dstID][0] == 1:
            label = 0
        elif g.nodes['faultSite'].data['loc'][dstID][1] == 1:
            label = 1
        else:
            label = 2
            
        if not os.path.isfile(logname):
            continue
            
        print(logname)
        f2 = open(logname, "r")
        l2 = f2.readlines()[1:]
        f2.close()

        
        num_pat = end_pat-start_pat
        cur_pat = -1
        cnt_per_pat = 0
        success = True
        subnodes = []
        srcIDlist = []
        patlist = []
        
        for fault in l2:
            w2 = fault.split()
            if len(w2) != 5:
#                 continue
                success = False
                break
            pat = int(w2[0])-1

            if pat < start_pat:
                continue
            
            if pat >= end_pat:
                break
            
            chname = w2[1]
            loc = int(w2[2])


            chain = cir.scanchains[cir.sopin.index(chname)]
            gname = chain[::-1][loc].name
            srcID = dic[gname]
            srcIDlist.append(srcID)
            patlist.append(pat-start_pat)
            
            tmpnodes =  g.successors(srcID, etype=('topNode', 'topEdge', 'faultSite')).numpy()
        
            if len(subnodes):
                tmpnodes = np.intersect1d(subnodes, tmpnodes)
            
     
            subnodes = np.array([idx for idx in tmpnodes if g.nodes['faultSite'].data['feats'][idx][pat-start_pat] == 1.0])

            if not len(subnodes):
                break

#             if cur_pat == -1 or cur_pat == pat:
#                 cnt_per_pat += 1
#                 cur_pat = pat
#                 continue
#             elif pat != cur_pat:
#                 tmp = []
#                 for idx in subnodes:
#                     valid = True
#                     cnt = 0
#                     for topidx in g.predecessors(idx, etype=('topNode', 'topEdge', 'faultSite')):
#                         if g.nodes['topNode'].data['feats'][topidx][cur_pat-start_pat] == 1.0:
#                             cnt += 1
#                     if cnt == cnt_per_pat:
#                         tmp.append(idx)
                
#                 subnodes = tmp
#                 print(tmp)
#                 if not len(tmp):
#                     break
                    
#                 cur_pat = pat
#                 cnt_per_pat = 0
            
        if not success:
            continue
        if not len(subnodes):
            print("No candidates!!!, {}".format(logname))
            continue
        
        print(subnodes)
        tot = len(patlist)
        patlist = np.unique(patlist)
        tmp = []
        for idx in subnodes:
            cnt = 0
            for topidx in g.predecessors(idx, etype=('topNode', 'topEdge', 'faultSite')):
                for p in patlist:
                    if g.nodes['topNode'].data['feats'][topidx][p] == 1.0:
                        cnt += 1
            tmp.append(cnt-tot)
        print(tmp)
#         print(np.unique(srcIDlist))
        with g.local_scope():
            sg = g.subgraph({'faultSite': subnodes})

            print("dstID: {}".format(dstID))
            assert(dstID in sg.ndata[dgl.NID]['faultSite'])
#             print(sg.ndata[dgl.NID]['faultSite'])

            infeats = torch.cat([sg.nodes['faultSite'].data['in_degree'], sg.nodes['faultSite'].data['out_degree'], sg.nodes['faultSite'].data['top_degree']], dim=1)
            infeats = torch.cat([infeats, sg.nodes['faultSite'].data['loc']], dim=1)
            infeats = torch.cat([infeats, sg.nodes['faultSite'].data['level']], dim=1)
#             infeats = torch.cat([infeats, sg.nodes['faultSite'].data['more']], dim=1)
#             infeats = torch.cat([infeats, sg.in_degrees(etype='net').view(-1,1).float()], dim=1)
#             infeats = torch.cat([infeats, sg.out_degrees(etype='net').view(-1,1).float()], dim=1)

            sg = dgl.to_homogeneous(sg)
            sg = dgl.add_reverse_edges(sg)
            sg.ndata['infeats'] = infeats
            
        sg = dgl.add_self_loop(sg)
        
        dataset.append((sg, label))
        dstIDset.append(dstID)
        
        if len(dataset)%5 == 0:
            print(len(dataset))
        
        if len(dataset) == num_samples:
            break
            
    print("Finish generating data")
    return dataset, dstIDset