In [51]:
import dgl
import numpy as np
import torch
import torch.nn as nn
from scipy import sparse
import networkx as nx
from scipy.sparse import diags
import torch.functional as F
from pathlib import Path

import json
import time
from pathlib import Path
from networkx.readwrite import json_graph, read_gpickle
from networkx.linalg.laplacianmatrix import laplacian_matrix
from scipy.io import mmwrite, mmread
from scipy.sparse import csr_matrix
import sklearn

# Utils Functions

In [130]:
# a = np.array([1, 0, 3])
def onehot(a): 
    b = np.zeros((a.size, a.max()+1))
    b[np.arange(a.size),a] = 1
    return b
# onehot(a)

def load_matrix(graph):
    adj_matrix = nx.adj_matrix(graph)
    degree_vec = adj_matrix.sum(axis=1).astype(np.float)
    with np.errstate(divide='ignore'):
        d_inv_sqrt = np.squeeze(np.asarray(np.power(degree_vec, -1)))
    d_inv_sqrt[np.isinf(d_inv_sqrt) | np.isnan(d_inv_sqrt)] = 0
    degree_matrix = diags(d_inv_sqrt, 0)
    return adj_matrix, degree_matrix

def lpa(adj_matrix,degree_matrix, labels,train_mask,iteration=10):
    influence=labels.copy()
    influence[np.arange(train_mask.size,labels.shape[0])]=0  # remove invisible_nodes
    for _ in range(iteration):
        influence = degree_matrix@adj_matrix@influence
        influence[train_mask]=labels[train_mask]
    pred=influence.argmax(1)
    labels=labels.argmax(1)
    border_nodes = (pred!=labels).nonzero()[0]
    acc = (pred==labels).sum()/labels.size
    return influence, acc, border_nodes

# influence, acc =lpa(adj_matrix,degree_matrix,onehot_labels,train_mask,iteration=20)

## Load Data

In [3]:
from dgl.data import citation_graph as citegrh
cora = citegrh.load_cora()
citeseer = citegrh.load_citeseer()
pubmed = citegrh.load_pubmed()

Downloading /data/data0/yushi/.dgl//citeseer.zip from https://data.dgl.ai/dataset/citeseer.zip...
Extracting file to /data/data0/yushi/.dgl//citeseer


  r_inv = np.power(rowsum, -1).flatten()


Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Downloading /data/data0/yushi/.dgl//pubmed.zip from https://data.dgl.ai/dataset/pubmed.zip...
Extracting file to /data/data0/yushi/.dgl//pubmed
Finished data loading and preprocessing.
  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000


In [206]:
data=cora
graph=data.graph
labels=data.labels
onehot_labels=onehot(data.labels)
train_mask=data.train_mask.astype(np.int).nonzero()[0]
train_labels = labels[train_mask]

## Run LPA

In [207]:
adj_matrix, degree_matrix=load_matrix(graph)
influence, acc, border_nodes =lpa(adj_matrix,degree_matrix,onehot_labels,train_mask,iteration=20)

## Coarsen Analysis

In [208]:
dataset='cora'
levels = 2
reduce_results = f"graphzoom/reduction_results/{dataset}/fusion/"
from graphzoom.utils import construct_proj_laplacian
def check_agg(nodes, projection):
    agg_sum=0
    for node in nodes:
        seed=projection[:,node].nonzero()[0][0]
        agg_nodes = projection[seed].nonzero()[1]
        if agg_nodes.size>1:
            agg_sum+=1
    return agg_sum, len(nodes)

In [209]:
projections, coarse_adj = construct_proj_laplacian(adj_matrix, levels, reduce_results)

In [210]:
check_agg(border_nodes,projections[0])

(696, 785)

In [234]:
def add_border_nodes_to_sparse_matrix(projection, nodes):
    # convert to csc
    if type(projection) is list:
        projection.reverse()
        proj=projection[0]
        for i in range(1,len(projection)):
            proj=proj@projection[i]
    else:
        proj=projection.tocsc()
    next_level_size=proj.shape[0]+1
    for node in nodes:
        proj.indices[node]=next_level_size
        next_level_size+=1
    return sparse.csc_matrix((proj.data, proj.indices,proj.indptr), dtype=np.float)

In [230]:
projection=projections
projection.reverse()
proj=projection[0]
for i in range(1,len(projection)):
    proj=proj@projection[i]

In [231]:
proj

<519x2708 sparse matrix of type '<class 'numpy.longlong'>'
	with 2708 stored elements in Compressed Sparse Row format>

In [214]:
# projections[0].shape
# csc_proj_mat = sparse.csc_matrix((csc_proj.data, csc_proj.indices,csc_proj.indptr))

(1169, 2708)

In [235]:
# csc_proj_mat.shape
cora_new_proj=add_border_nodes_to_sparse_matrix(projections, border_nodes)

In [236]:
cora_new_proj

<2708x519 sparse matrix of type '<class 'numpy.float64'>'
	with 2708 stored elements in Compressed Sparse Column format>

In [180]:
mat_csc.indices

In [182]:
mat_csc.indptr

array([0, 1, 2, 4, 6, 8], dtype=int32)