In [1]:
import numpy as np
import torch
import dgl
import time


  from .autonotebook import tqdm as notebook_tqdm


In [43]:
def loss_csr(raw_ptr,raw_indice,lossNode,randomLoss=0.5,degreeCut=None,CutRatio=0.5):
    raw_ptr = raw_ptr.cuda()
    nodeNUM = raw_ptr.shape[0] - 1
    ptr_diff = torch.diff(raw_ptr)  # 0.01s

    # 0.2s
    # 裁剪点
    # length = lossNode.size(0)
    # indices_permuted = torch.randperm(length)
    # selected_indices = indices_permuted[:int(length * randomLoss)]
    # lossNode = lossNode[selected_indices]
    mask = torch.ones(nodeNUM, dtype=torch.bool).cuda()
    mask[lossNode.to(torch.int64)] = False
    ptr_diff[lossNode.to(torch.int64)] = 0
    
    # 裁剪边
    if degreeCut != None:
        condition = ptr_diff >= degreeCut
        ptr_diff[condition] = (ptr_diff[condition] * CutRatio).to(torch.int32) 

    allTime = time.time()
    new_ptr = torch.cat((torch.zeros(1).to(torch.int32).to(ptr_diff.device),torch.cumsum(ptr_diff,dim = 0).to(torch.int32)))
    id2featMap = mask.cumsum(dim=0).to(torch.int32)
    id2featMap -= 1
    id2featMap[lossNode.to(torch.int64)] = -1
    ptr_diff,mask = None,None
    print(f"ptr_diff using time :{time.time()-allTime:.3f}s")
    # indice
    
    allTime = time.time()
    new_indice = torch.zeros(new_ptr[-1].item(),dtype=torch.int32)
    print(f"clone time :{time.time()-allTime:.3f}s")
    allTime = time.time()
    raw_indice,new_indice = raw_indice.cuda(),new_indice.cuda()
    dgl.loss_csr(raw_ptr,new_ptr,raw_indice,new_indice)
    raw_ptr,raw_indice = None,None
    print(f"loss_csr func using time :{time.time()-allTime:.3f}s")
    return new_ptr,new_indice,id2featMap

In [33]:
ptr = torch.as_tensor(np.fromfile("/home/bear/workspace/single-gnn/data/partition/FR/part0/indptr.bin",dtype=np.int32))
indice = torch.as_tensor(np.fromfile("/home/bear/workspace/single-gnn/data/partition/FR/part0/indices.bin",dtype=np.int32))
sortIds = torch.as_tensor(np.fromfile("/home/bear/workspace/single-gnn/data/partition/FR/part0/sortIds.bin",dtype=np.int32))
trainIds = torch.as_tensor(np.fromfile("/home/bear/workspace/single-gnn/data/partition/FR/part0/trainIds.bin",dtype=np.int64))
saveNode = sortIds[:int(len(sortIds) * 0.8)]
lossNode = sortIds[int(len(sortIds) * 0.8):]

In [45]:
allTime = time.time()
ptr,indice,lossNode = ptr.cuda(),indice.cpu(),lossNode.cuda()
new_ptr,new_indice,id2featMap = loss_csr(ptr,indice,lossNode,randomLoss=0.5,degreeCut=40,CutRatio=0.5)
print(f"loss_csr using time :{time.time()-allTime:.3f}s")

ptr_diff using time :0.001s
clone time :0.118s
loss_csr func using time :1.147s
loss_csr using time :1.277s


In [44]:
def streamLossGraph(raw_ptr,raw_indice,lossNode,sliceNUM=1,randomLoss=0.5,degreeCut=None,CutRatio=0.5):
    # ptr始终位于GPU中，indice同样位于GPU中，raw流式传入
    raw_ptr = raw_ptr.cuda()
    raw_indice = raw_indice.cpu()
    nodeNUM = raw_ptr.shape[0] - 1
    ptr_diff = torch.diff(raw_ptr)  # 0.01s

    # 裁剪点 0.2s
    # length = lossNode.size(0)
    # selected_indices = torch.randperm(length)[:int(length * randomLoss)]
    # lossNode = lossNode[selected_indices]
    mask = torch.ones(nodeNUM, dtype=torch.bool).cuda()
    mask[lossNode.to(torch.int64)] = False
    ptr_diff[lossNode.to(torch.int64)] = 0
    
    # 裁剪边
    if degreeCut != None:
        condition = ptr_diff >= degreeCut
        ptr_diff[condition] = (ptr_diff[condition] * CutRatio).to(torch.int32) 

    allTime = time.time()
    new_ptr = torch.cat((torch.zeros(1).to(torch.int32).to(ptr_diff.device),torch.cumsum(ptr_diff,dim = 0).to(torch.int32)))
    id2featMap = mask.cumsum(dim=0).to(torch.int32)
    id2featMap -= 1
    id2featMap[lossNode.to(torch.int64)] = -1
    ptr_diff,mask = None,None
    print(f"ptr_diff using time :{time.time()-allTime:.3f}s")
    # indice

    blockSize = (nodeNUM - 1) // sliceNUM + 1
    bound = []
    lastIdx = 0
    for i in range(sliceNUM):
        nextSlice = min((i+1)*blockSize,nodeNUM)
        bound.append([lastIdx,nextSlice])
        lastIdx = nextSlice

    new_indice = torch.zeros(new_ptr[-1].item()-1,dtype=torch.int32,device="cuda:0")
    allTime = time.time()
    for left,right in bound:
        raw_off = raw_ptr[left:right+1]-raw_ptr[left].item()
        new_off = new_ptr[left:right+1]-new_ptr[left].item()
        rawIndiceOff = raw_indice[raw_ptr[left].item():raw_ptr[right].item()].cuda()
        newIndiceOff = new_indice[new_ptr[left].item():new_ptr[right].item()]
        dgl.loss_csr(raw_off,new_off,rawIndiceOff,newIndiceOff)
    print(f"loss_csr func using time :{time.time()-allTime:.3f}s")
    raw_ptr,raw_indice = None,None
    return new_ptr,new_indice,id2featMap

In [46]:
allTime = time.time()
ptr,indice,lossNode = ptr.cuda(),indice.cpu(),lossNode.cuda()
s_new_ptr,s_new_indice,s_id2featMap = loss_csr(ptr,indice,lossNode,randomLoss=0.5,degreeCut=40,CutRatio=0.5)
print(f"loss_csr using time :{time.time()-allTime:.3f}s")

ptr_diff using time :0.001s
clone time :0.115s
loss_csr func using time :0.970s
loss_csr using time :1.094s


In [None]:
torch.equal(new_indice,s_new_indice)