In [2]:
import numpy as np
import torch
import dgl
import time


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def coo2csc_sort(row,col):  # src,dst
    sort_col,indice = torch.sort(col,dim=0)
    indice = row[indice]
    inptr = torch.cat([torch.Tensor([0]).to(torch.int32),torch.cumsum(torch.bincount(sort_col), dim=0)])
    return inptr,indice

def coo2csc_dgl(srcs,dsts):
    g = dgl.graph((srcs,dsts)).formats('csc')       # Sequential switching is equivalent to converting CSC and compressing dst
    indptr, indices, _ = g.adj_sparse(fmt='csc')
    return indptr,indices



In [4]:
dst = torch.tensor([0,0,0,1,1,3,3]).to(torch.int32)#.cuda()
src = torch.tensor([1,2,3,4,5,6,7]).to(torch.int32)#.cuda()
indptr, indices = coo2csc_sort(src,dst)
print("ptr: ",indptr)
print("indices: ",indices)
print('-'*10)
indptr, indices = coo2csc_sort(dst,src)
print("ptr: ",indptr)
print("indices: ",indices)

ptr:  tensor([0, 3, 5, 5, 7])
indices:  tensor([1, 2, 3, 4, 5, 6, 7], dtype=torch.int32)
----------
ptr:  tensor([0, 0, 1, 2, 3, 4, 5, 6, 7])
indices:  tensor([0, 0, 0, 1, 1, 3, 3], dtype=torch.int32)


In [5]:
indptr, indices = coo2csc_dgl(src,dst)
print("ptr: ",indptr)
print("indices: ",indices)
print('-'*10)
indptr, indices = coo2csc_dgl(dst,src)
print("ptr: ",indptr)
print("indices: ",indices)

ptr:  tensor([0, 3, 5, 5, 7, 7, 7, 7, 7], dtype=torch.int32)
indices:  tensor([1, 2, 3, 4, 5, 6, 7], dtype=torch.int32)
----------
ptr:  tensor([0, 0, 1, 2, 3, 4, 5, 6, 7], dtype=torch.int32)
indices:  tensor([0, 0, 0, 1, 1, 3, 3], dtype=torch.int32)


In [16]:
def cooTocsc(srcList,dstList,sliceNUM=1,device=torch.device('cpu')):
    dstList = dstList.cuda()
    inptr = torch.cat([torch.Tensor([0]).to(torch.int32).to(dstList.device),torch.cumsum(torch.bincount(dstList), dim=0)]).to(torch.int32)
    dstList = dstList.cpu()
    indice = torch.zeros_like(srcList,dtype=torch.int32,device="cuda")
    addr = inptr.clone()[:-1].cuda()
    if sliceNUM <= 1:
        srcList = srcList.cuda()
        dgl.cooTocsr(inptr,indice,addr,dstList,srcList) # compact dst save src
        inptr,indice = inptr.cpu(),indice.cpu()
        addr = None
        srcList = srcList.cpu()
        dstList = dstList.cpu()
        return inptr,indice
    else:
        dstList = dstList.cpu()
        src_batches = torch.chunk(srcList, sliceNUM, dim=0)
        dst_batches = torch.chunk(dstList, sliceNUM, dim=0)
        batch = [src_batches, dst_batches]
        for _,(src_batch,dst_batch) in enumerate(zip(*batch)):
            src_batch = src_batch.cuda()
            dst_batch = dst_batch.cuda()
            dgl.cooTocsr(inptr,indice,addr,dst_batch,src_batch) # compact dst save src
        addr,dst_batch,src_batch= None,None,None
        inptr = inptr.cpu() 
        indice = indice.cpu()
        return inptr,indice

In [7]:
indptr, indices = cooTocsc(src,dst,sliceNUM=1,device=torch.device('cpu'))
print("ptr: ",indptr)
print("indices: ",indices)
indptr, indices = cooTocsc(dst,src,sliceNUM=1,device=torch.device('cpu'))
print('-'*10)
print("ptr: ",indptr)
print("indices: ",indices)

ptr:  tensor([0, 3, 5, 5, 7], dtype=torch.int32)
indices:  tensor([1, 2, 3, 4, 5, 6, 7], dtype=torch.int32)
----------
ptr:  tensor([0, 0, 1, 2, 3, 4, 5, 6, 7], dtype=torch.int32)
indices:  tensor([0, 0, 0, 1, 1, 3, 3], dtype=torch.int32)


In [8]:
graph = torch.as_tensor(np.fromfile("/home/bear/workspace/single-gnn/data/partition/FR/part0/raw_G.bin",dtype=np.int32))
src = graph[::2]
dst = graph[1::2]

In [17]:
indptr, indices = cooTocsc(src,dst,sliceNUM=4,device=torch.device('cpu'))
print("ptr: ",indptr)
print("indices: ",indices)

ptr:  tensor([         0,          0,          1,  ..., 1082153921, 1082153921,
        1082153929], dtype=torch.int32)
indices:  tensor([       0,        0,        0,  ..., 16416514,  3076097,  8290834],
       dtype=torch.int32)


In [21]:
indptr[:20]

tensor([ 0,  0,  1,  2,  3,  4,  5,  7, 10, 12, 13, 15, 16, 22, 29, 30, 30, 31,
        33, 35], dtype=torch.int32)

In [20]:
indices[:40]

tensor([   0,    0,    0,    0,    0,    0,  230,    0,    6,    7,    0,  927,
           0,    0,    1,    0,    0,    6,    7, 1082,  230, 1084,    0,    6,
           7, 1082,  230, 1084,   12,    0,    0,    0,    4,    0, 1092, 1085,
           0,    6,    7,   12], dtype=torch.int32)

In [10]:
dgl_indptr, dgl_indices = coo2csc_dgl(src,dst)
print("ptr: ",dgl_indptr)
print("indices: ",dgl_indices)

ptr:  tensor([         0,          0,          1,  ..., 1082153921, 1082153921,
        1082153929], dtype=torch.int32)
indices:  tensor([       0,        0,        0,  ..., 16416514,  3076097,  8290834],
       dtype=torch.int32)


In [14]:
dgl_indices[:40]

tensor([   0,    0,    0,    0,    0,    0,  230,    7,    0,    6,    0,  927,
           0,    0,    1,    0,    0,  230,    6,    7, 1082, 1084,    0,  230,
           6,    7, 1082, 1084,   12,    0,    0,    0,    4,    0, 1092,    0,
           6,    7, 1264,   12], dtype=torch.int32)

In [11]:
sort_indptr, sort_indices = coo2csc_sort(src,dst)
print("ptr: ",sort_indptr)
print("indices: ",sort_indices)

ptr:  tensor([         0,          0,          1,  ..., 1082153921, 1082153921,
        1082153929])
indices:  tensor([      0,       0,       0,  ...,  310685, 1356970,  510434],
       dtype=torch.int32)


In [18]:
torch.equal(indptr,dgl_indptr)

True

In [19]:
torch.equal(indptr,sort_indptr.to(indptr.dtype))

True