In [2]:
import numpy as np
import sys
import pickle as pkl
import networkx as nx
import scipy.sparse as sp


def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def load_data(dataset):
    # load the data: x, tx, allx, graph
    names = ['x', 'tx', 'allx', 'graph']
    objects = []
    for i in range(len(names)):
        with open("../gae/data/ind.{}.{}".format(dataset, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))
    x, tx, allx, graph = tuple(objects)
    test_idx_reorder = parse_index_file("../gae/data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    return adj, features

In [3]:
adjascency_matrix, features = load_data('citeseer')

  if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):


In [4]:
adjascency_matrix

<3327x3327 sparse matrix of type '<class 'numpy.int64'>'
	with 9228 stored elements in Compressed Sparse Row format>

In [5]:
features[0].shape

(1, 3703)

In [9]:
print(features)

  (0, 184)	1.0
  (0, 257)	1.0
  (0, 362)	1.0
  (0, 560)	1.0
  (0, 565)	1.0
  (0, 597)	1.0
  (0, 600)	1.0
  (0, 601)	1.0
  (0, 637)	1.0
  (0, 729)	1.0
  (0, 805)	1.0
  (0, 816)	1.0
  (0, 942)	1.0
  (0, 1116)	1.0
  (0, 1435)	1.0
  (0, 1545)	1.0
  (0, 1623)	1.0
  (0, 1635)	1.0
  (0, 1846)	1.0
  (0, 2085)	1.0
  (0, 2338)	1.0
  (0, 2343)	1.0
  (0, 2565)	1.0
  (0, 2604)	1.0
  (0, 2696)	1.0
  (0, 2741)	1.0
  (0, 2918)	1.0
  (0, 2970)	1.0
  (0, 3502)	1.0
  (0, 3548)	1.0
  (0, 3647)	1.0
  (1, 82)	1.0
  (1, 102)	1.0
  (1, 114)	1.0
  (1, 417)	1.0
  (1, 653)	1.0
  (1, 797)	1.0
  (1, 805)	1.0
  (1, 831)	1.0
  (1, 849)	1.0
  (1, 892)	1.0
  (1, 1073)	1.0
  (1, 1083)	1.0
  (1, 1165)	1.0
  (1, 1288)	1.0
  (1, 1953)	1.0
  (1, 2436)	1.0
  (1, 2510)	1.0
  (1, 2733)	1.0
  (1, 2741)	1.0
  (1, 2879)	1.0
  (1, 2909)	1.0
  (1, 2930)	1.0
  (1, 3016)	1.0
  (1, 3126)	1.0
  (1, 3160)	1.0
  (1, 3228)	1.0
  (1, 3254)	1.0
  (1, 3330)	1.0
  (1, 3364)	1.0
  (1, 3447)	1.0
  (1, 3461)	1.0
  (1, 3639)	1.0
  (1, 3640)	1.0


In [6]:
dense_features = features[0][0:50].todense()

In [8]:
dense_features.shape

(1, 3703)