In [40]:
import pandas as pd
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
from sklearn.preprocessing import normalize
import scipy.sparse as sp

In [10]:
def make_graph(edge_df):
    graph = nx.from_edgelist([(cust,opp) for cust, opp in zip(edge_df['cust_id'],edge_df['opp_id'])])
    
    return graph

In [25]:
# edge_df = pd.read_csv('/Users/shuaihengxiao/Desktop/DANMF/DANMF-master/input/chameleon_edges.csv',names=['cust_id','opp_id'])

In [30]:
import sys
sys.path.append('..')
from graphSAGE_v0.random_graph import random_graph_gcn
node_df, edge_df = random_graph_gcn(1000,3000)

In [31]:
graph = make_graph(edge_df)

In [106]:
class EdgeSampler(torch.utils.data.Dataset):
    """Sample edges and non-edges uniformly from a graph.

    Args:
        A: adjacency matrix.
        num_pos: number of edges per batch.
        num_neg: number of non-edges per batch.
    """
    def __init__(self, A, num_pos=1000, num_neg=1000):
       
        self.num_pos = num_pos
        self.num_neg = num_neg
        self.A = A
        self.edges = np.transpose(A.nonzero())
        self.num_nodes = A.shape[0]
        self.num_edges = self.edges.shape[0]
        #print('2')

    def __getitem__(self, key):
        np.random.seed(key)
        edges_idx = np.random.randint(0, self.num_edges, size=self.num_pos, dtype=np.int64)
        next_edges = self.edges[edges_idx, :]
        print('1')
        # Select num_neg non-edges
        generated = False
        while not generated:
            candidate_ne = np.random.randint(0, self.num_nodes, size=(2*self.num_neg, 2), dtype=np.int64)
            cne1, cne2 = candidate_ne[:, 0], candidate_ne[:, 1]
            to_keep = (1 - self.A[cne1, cne2]).astype(np.bool).A1 * (cne1 != cne2)
            print('here')
            next_nonedges = candidate_ne[to_keep][:self.num_neg]
            generated = to_keep.sum() >= self.num_neg
        return torch.LongTensor(next_edges), torch.LongTensor(next_nonedges)

    def __len__(self):
        return 2**32

def collate_fn(batch):
    edges, nonedges = batch[0]
    return (edges, nonedges)

def get_edge_sampler(A, num_pos=1000, num_neg=1000, num_workers=2):
    data_source = EdgeSampler(A, num_pos, num_neg)
    return torch.utils.data.DataLoader(data_source, num_workers=num_workers, collate_fn=collate_fn)


In [107]:
sampler = get_edge_sampler(nx.adjacency_matrix(graph))

2


In [163]:
type(nx.adjacency_matrix(graph))

scipy.sparse.csr.csr_matrix

In [80]:
A = nx.adjacency_matrix(graph).toarray()

In [92]:
np.random.seed(12)

In [93]:
edges = np.transpose(A.nonzero())

In [94]:
edges_idx = np.random.randint(0, edges.shape[0], size=500, dtype=np.int64)

In [95]:
next_edges = edges[edges_idx, :]

In [96]:
candidate_ne = np.random.randint(0, graph.number_of_nodes(), size=(2*500, 2), dtype=np.int64)
cne1, cne2 = candidate_ne[:, 0], candidate_ne[:, 1]

In [138]:
a = (1 - A[cne1, cne2]).astype(np.bool)

In [139]:
b = (cne1 != cne2).astype(np.bool)

In [144]:
to_keep = (1 - A[cne1, cne2]).astype(np.bool) * (cne1 != cne2).astype(np.bool)

In [145]:
to_keep

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [154]:
to_keep.shape

(1000,)

In [153]:
next_nonedges = candidate_ne[to_keep][:500]

In [58]:
graph.number_of_nodes()

998

AttributeError: 'numpy.ndarray' object has no attribute 'nnz'

In [37]:
x_norm = normalize(node_df)

In [49]:
def to_sparse_tensor(matrix,
                     cuda = False):
    """Convert a scipy sparse matrix to a torch sparse tensor.

    Args:
        matrix: Sparse matrix to convert.
        cuda: Whether to move the resulting tensor to GPU.

    Returns:
        sparse_tensor: Resulting sparse tensor (on CPU or on GPU).

    """
    if sp.issparse(matrix):
        coo = matrix.tocoo()
        indices = torch.LongTensor(np.vstack([coo.row, coo.col]))
        values = torch.FloatTensor(coo.data)
        shape = torch.Size(coo.shape)
        sparse_tensor = torch.sparse.FloatTensor(indices, values, shape)
    elif torch.is_tensor(matrix):
        row, col = matrix.nonzero().t()
        indices = torch.stack([row, col])
        values = matrix[row, col]
        shape = torch.Size(matrix.shape)
        sparse_tensor = torch.sparse.FloatTensor(indices, values, shape)
    else:
        raise ValueError(f"matrix must be scipy.sparse or torch.Tensor (got {type(matrix)} instead).")
    if cuda:
        sparse_tensor = sparse_tensor.cuda()
    return sparse_tensor.coalesce()

In [51]:
x_norm = to_sparse_tensor(torch.tensor(x_norm),)