In [2]:
import dgl
import numpy as np
import torch
import torch.nn as nn
from scipy import sparse
import networkx as nx
from scipy.sparse import diags
import torch.functional as F
from pathlib import Path

import json
import time
from pathlib import Path
from networkx.readwrite import json_graph, read_gpickle
from networkx.linalg.laplacianmatrix import laplacian_matrix
from scipy.io import mmwrite, mmread
from scipy.sparse import csr_matrix
import sklearn
from scipy.io import mmread, mmwrite # spare save/load

# Utils Functions

In [3]:
# a = np.array([1, 0, 3])
def onehot(a): 
    b = np.zeros((a.size, a.max()+1))
    b[np.arange(a.size),a] = 1
    return b
# onehot(a)

def load_matrix(graph):
    adj_matrix = nx.adj_matrix(graph)
    degree_vec = adj_matrix.sum(axis=1).astype(np.float)
    with np.errstate(divide='ignore'):
        d_inv_sqrt = np.squeeze(np.asarray(np.power(degree_vec, -1)))
    d_inv_sqrt[np.isinf(d_inv_sqrt) | np.isnan(d_inv_sqrt)] = 0
    degree_matrix = diags(d_inv_sqrt, 0)
    return adj_matrix, degree_matrix

def lpa(adj_matrix,degree_matrix, labels,train_mask,iteration=10):
    influence=labels.copy()
    influence[np.arange(train_mask.size,labels.shape[0])]=0  # remove invisible_nodes
    for _ in range(iteration):
        influence = degree_matrix@adj_matrix@influence
        influence[train_mask]=labels[train_mask]
    pred=influence.argmax(1)
    labels=labels.argmax(1)
    border_nodes = (pred!=labels).nonzero()[0]
    acc = (pred==labels).sum()/labels.size
    return influence, acc, border_nodes

# influence, acc =lpa(adj_matrix,degree_matrix,onehot_labels,train_mask,iteration=20)

## Load Data

In [4]:
from dgl.data import citation_graph as citegrh
cora = citegrh.load_cora()
citeseer = citegrh.load_citeseer()
pubmed = citegrh.load_pubmed()

  r_inv = np.power(rowsum, -1).flatten()


Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Finished data loading and preprocessing.
  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000


In [5]:
data=cora
graph=data.graph
labels=data.labels
onehot_labels=onehot(data.labels)
train_mask=data.train_mask.astype(np.int).nonzero()[0]
train_labels = labels[train_mask]

## Run LPA

In [6]:
adj_matrix, degree_matrix=load_matrix(graph)
influence, acc, border_nodes =lpa(adj_matrix,degree_matrix,onehot_labels,train_mask,iteration=20)

## Coarsen Analysis

In [121]:
dataset='cora'
levels = 4
reduce_results = f"graphzoom/reduction_results/{dataset}/fusion/"
from graphzoom.utils import construct_proj_laplacian
def check_agg(nodes, projection):
    coarsen_nodes=[]
    for node in nodes:
        seed=projection[:,node].nonzero()[0][0]
        agg_nodes = projection[seed].nonzero()[1]
        if agg_nodes.size>1:
            coarsen_nodes.append(node)
    return coarsen_nodes

In [122]:
projections, coarse_adj = construct_proj_laplacian(adj_matrix, levels, reduce_results)

In [125]:
coarse_nodes = check_agg(border_nodes,projections[0])
print(len(coarse_nodes), len(border_nodes))

696 785


In [127]:
projections

[<1169x2708 sparse matrix of type '<class 'numpy.longlong'>'
 	with 2708 stored elements in Compressed Sparse Row format>,
 <519x1169 sparse matrix of type '<class 'numpy.longlong'>'
 	with 1169 stored elements in Compressed Sparse Row format>,
 <218x519 sparse matrix of type '<class 'numpy.longlong'>'
 	with 519 stored elements in Compressed Sparse Row format>,
 <100x218 sparse matrix of type '<class 'numpy.longlong'>'
 	with 218 stored elements in Compressed Sparse Row format>]

In [130]:
def add_border_nodes_to_proj_matrix(projection, nodes):
    # convert to csc
    projs=[]
    if type(projection) is not list:
        projection=[projection]
    # change first projection
    proj=projection[0].tocsc()
    print(proj.shape)
    next_level_size=proj.shape[0]
    for node in nodes:
        proj.indices[node]=next_level_size
        next_level_size+=1
    projs.append(sparse.csc_matrix((proj.data, proj.indices,proj.indptr), dtype=np.float))
    # change via coo_matrix
    for i in range(1, len(projection)):
        proj = projection[i].tocoo()
        data = np.ones(proj.shape[1]+len(nodes))
        col = np.hstack((proj.col, np.arange(proj.shape[1],proj.shape[1]+len(nodes))))
        row = np.hstack((proj.row, np.arange(proj.shape[0],proj.shape[0]+len(nodes))))
        projs.append(sparse.coo_matrix((data, (row, col))))
    
    return projs

In [133]:
border_proj=add_border_nodes_to_proj_matrix(projections[:3], coarse_nodes)
border_proj

(1169, 2708)


[<1865x2708 sparse matrix of type '<class 'numpy.float64'>'
 	with 2708 stored elements in Compressed Sparse Column format>,
 <1215x1865 sparse matrix of type '<class 'numpy.float64'>'
 	with 1865 stored elements in COOrdinate format>,
 <914x1215 sparse matrix of type '<class 'numpy.float64'>'
 	with 1215 stored elements in COOrdinate format>]

In [150]:
prefix=Path(f"graphzoom/reduction_results/{dataset}/border/")
if not prefix.exists():
    prefix.mkdir(parents=True)
for i in range(len(border_proj)):
    mmwrite(str(prefix.joinpath(f'Projection_{i+1}.mtx')), border_proj[i])

In [151]:
# prefix.joinpath('123').name

'123'

In [110]:
# proj

<1215x1865 sparse matrix of type '<class 'numpy.float64'>'
	with 1865 stored elements in COOrdinate format>

In [104]:
# 519+len(coarse_nodes)