In [7]:
import numpy as np
from scipy import io
import torch
import dgl
from sklearn.preprocessing import label_binarize
from scipy.linalg import eigh
from dglnode import *
import scipy
import time

In [2]:
def load_fb100(filename):
    # e.g. filename = Rutgers89 or Cornell5 or Wisconsin87 or Amherst41
    # columns are: student/faculty, gender, major,
    #              second major/minor, dorm/house, year/ high school
    # 0 denotes missing entry
    mat = io.loadmat('data/Penn94.mat')
    A = mat['A']
    metadata = mat['local_info']
    return A, metadata


def load_fb100_dataset():
    A, metadata = load_fb100('Penn94.mat')

    edge_index = torch.tensor(A.nonzero(), dtype=torch.long)
    metadata = metadata.astype(int)
    label = metadata[:, 1] - 1  # gender label, -1 means unlabeled

    # make features into one-hot encodings
    feature_vals = np.hstack((np.expand_dims(metadata[:, 0], 1), metadata[:, 2:]))
    features = np.empty((A.shape[0], 0))
    for col in range(feature_vals.shape[1]):
        feat_col = feature_vals[:, col]
        feat_onehot = label_binarize(feat_col, classes=np.unique(feat_col))
        features = np.hstack((features, feat_onehot))

    node_feat = torch.tensor(features, dtype=torch.float)
    num_nodes = metadata.shape[0]
    
    return num_nodes, edge_index, node_feat, label

In [3]:
n, edeg, x, y = load_fb100_dataset()

  from ipykernel import kernelapp as app


In [4]:
g = dgl.graph((edeg[0, :], edeg[1, :]), num_nodes=n)
g = dgl.add_reverse_edges(g)
g = dgl.to_simple(g)

In [5]:
A = g.adj(scipy_fmt='csr')
deg = np.array(A.sum(axis=0)).flatten()
D_ = scipy.sparse.diags(deg ** -0.5)

In [6]:
A_ = D_.dot(A.dot(D_))
L_ = scipy.sparse.eye(g.num_nodes()) - A_

In [16]:
t1 = time.time()
e, u = scipy.sparse.linalg.eigsh(L_, k=500, which='SM', tol=1)
t2 = time.time()

In [4]:
e = torch.load('data/penn_e.pt')
u = torch.load('data/penn_u.pt')

In [5]:
e_tensor = torch.FloatTensor(e)
u_tensor = torch.FloatTensor(u)
x_tensor = torch.FloatTensor(x)
y_tensor = torch.LongTensor(y)

In [6]:
torch.save([e_tensor, u_tensor, x_tensor, y_tensor], 'data/penn.pt')

In [5]:
dataset = DglNodePropPredDataset('ogbn-arxiv')

In [6]:
g = dataset[0][0]
g = dgl.add_reverse_edges(g)
g = dgl.to_simple(g)

In [7]:
A = g.adj(scipy_fmt='csr')
deg = np.array(A.sum(axis=0)).flatten()
D_ = scipy.sparse.diags(deg ** -0.5)

In [8]:
A_ = D_.dot(A.dot(D_))
L_ = scipy.sparse.eye(g.num_nodes()) - A_

In [6]:
e1, u1 = scipy.sparse.linalg.eigsh(L_, k=5000, which='SM', tol=1e-5)

In [19]:
e2, u2 = scipy.sparse.linalg.eigsh(L_, k=100, which='BE', tol=1e-5)

In [10]:
e3, u3 = scipy.sparse.linalg.eigsh(L_, k=100, which='LM', tol=1e-5)

In [10]:
e = torch.FloatTensor(e1)
u = torch.FloatTensor(u1)

In [12]:
torch.save(e, 'data/arxiv_e.pt')
torch.save(u, 'data/arxiv_u.pt')