In [7]:
import numpy as np
import scipy.sparse as sp
import torch

import dgl
from dgl.data import CiteseerGraphDataset, PubmedGraphDataset
from dgl import AddSelfLoop
from dgl import to_networkx
import numpy as np
import scipy.sparse as sp
from scipy.sparse import coo_matrix, save_npz,  load_npz
import torch


In [8]:
transform = AddSelfLoop()  # Add self-loops
data = CiteseerGraphDataset(transform=transform)
g = data[0]  # Get the first graph object from the dataset

features = g.ndata['feat']  # Node features


labels = g.ndata['label']  # Node labels


idx_train = torch.nonzero(g.ndata['train_mask'], as_tuple=False).squeeze()
idx_val = torch.nonzero(g.ndata['val_mask'], as_tuple=False).squeeze()
idx_test = torch.nonzero(g.ndata['test_mask'], as_tuple=False).squeeze()

src, dst = g.edges()
num_nodes = g.num_nodes()

adj_sparse = sp.coo_matrix((torch.ones(src.shape[0]), (src.numpy(), dst.numpy())),
                           shape=(num_nodes, num_nodes),
                           dtype=np.float32)

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [10]:
torch.save(labels, 'citeseer_dgl/labels.pt')
torch.save(features, 'citeseer_dgl/features.pt')
torch.save(idx_train, 'citeseer_dgl/idx_train.pt')
torch.save(idx_val, 'citeseer_dgl/idx_val.pt')
torch.save(idx_test, 'citeseer_dgl/idx_test.pt')
save_npz('citeseer_dgl/adj_sparse.npz', adj_sparse)

In [3]:
def encode_onehot(labels):
    # The classes must be sorted before encoding to enable static class encoding.
    # In other words, make sure the first class always maps to index 0.
    classes = sorted(list(set(labels)))
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)), dtype=np.int32)
    return labels_onehot

def normalize_adj(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv_sqrt = np.power(rowsum, -0.5).flatten()
    r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0.
    r_mat_inv_sqrt = sp.diags(r_inv_sqrt)
    return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt)


def normalize_features(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx


def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)

In [13]:
path="data/cora/"
dataset="citeseer"

print('Loading {} dataset...'.format(dataset))
idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str))
features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
labels = encode_onehot(idx_features_labels[:, -1])





Loading citeseer dataset...


In [9]:
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}
edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32)
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape)
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)


In [12]:
edges

array([[ 163,  402],
       [ 163,  659],
       [ 163, 1696],
       ...,
       [1887, 2258],
       [1902, 1887],
       [ 837, 1686]], dtype=int32)

In [11]:
print(adj)

  (163, 402)	1.0
  (163, 659)	1.0
  (163, 1696)	1.0
  (163, 2295)	1.0
  (163, 1274)	1.0
  (163, 1286)	1.0
  (163, 1544)	1.0
  (163, 2600)	1.0
  (163, 2363)	1.0
  (163, 1905)	1.0
  (163, 1611)	1.0
  (163, 141)	1.0
  (163, 1807)	1.0
  (163, 1110)	1.0
  (163, 174)	1.0
  (163, 2521)	1.0
  (163, 1792)	1.0
  (163, 1675)	1.0
  (163, 1334)	1.0
  (163, 813)	1.0
  (163, 1799)	1.0
  (163, 1943)	1.0
  (163, 2077)	1.0
  (163, 765)	1.0
  (163, 769)	1.0
  :	:
  (2228, 1093)	1.0
  (2228, 1094)	1.0
  (2228, 2068)	1.0
  (2228, 2085)	1.0
  (2694, 2331)	1.0
  (617, 226)	1.0
  (422, 1691)	1.0
  (2142, 2096)	1.0
  (1477, 1252)	1.0
  (1485, 1252)	1.0
  (2185, 2109)	1.0
  (2117, 2639)	1.0
  (1211, 1247)	1.0
  (1884, 745)	1.0
  (1884, 1886)	1.0
  (1884, 1902)	1.0
  (1885, 745)	1.0
  (1885, 1884)	1.0
  (1885, 1886)	1.0
  (1885, 1902)	1.0
  (1886, 745)	1.0
  (1886, 1902)	1.0
  (1887, 2258)	1.0
  (1902, 1887)	1.0
  (837, 1686)	1.0


In [133]:
idx_features_labels

array([['100157', '0', '0', ..., '0', '0', 'Agents'],
       ['100598', '0', '0', ..., '0', '0', 'IR'],
       ['105684', '0', '1', ..., '0', '0', 'Agents'],
       ...,
       ['zhang99towards', '0', '0', ..., '0', '0', 'IR'],
       ['zhou00implementation', '0', '0', ..., '0', '0', 'DB'],
       ['455346', '0', '0', ..., '0', '0', 'ML']], dtype='<U33')

In [34]:
# build graph
#idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx = idx_features_labels[:, 0]
idx_map = {j: i for i, j in enumerate(idx)}
#edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32)
edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),  dtype=np.dtype(str))
#edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape)
edges_temp = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.dtype(str)).reshape(edges_unordered.shape)
edges_temp[edges_temp == 'None'] = '-1'  
rows_without_none = ~np.any(edges_temp == '-1', axis=1)
edges_temp = edges_temp.astype(np.int32)
edges = edges_temp[rows_without_none]
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)

In [37]:
labels.shape[0]

3312

In [38]:
len(idx)

3312

In [15]:
print(adj)

  (0, 0)	1.0
  (0, 99)	1.0
  (0, 111)	1.0
  (0, 381)	1.0
  (0, 415)	1.0
  (0, 514)	1.0
  (0, 585)	1.0
  (0, 690)	1.0
  (0, 691)	1.0
  (0, 783)	1.0
  (0, 784)	1.0
  (0, 954)	1.0
  (1, 153)	1.0
  (1, 732)	1.0
  (1, 1937)	1.0
  (2177, 2903)	1.0
  (1011, 1034)	1.0
  (1011, 2028)	1.0
  (1011, 2029)	1.0
  (2179, 2200)	1.0
  (2, 962)	1.0
  (1012, 2181)	1.0
  (1012, 2031)	1.0
  (1013, 1417)	1.0
  (1013, 1427)	1.0
  :	:
  (1005, 1336)	1.0
  (1005, 481)	1.0
  (1005, 1586)	1.0
  (1005, 2860)	1.0
  (1005, 680)	1.0
  (1005, 920)	1.0
  (1006, 779)	1.0
  (2166, 1170)	1.0
  (3303, 2399)	1.0
  (3305, 1529)	1.0
  (3305, 2654)	1.0
  (3305, 2166)	1.0
  (2168, 1801)	1.0
  (2168, 2168)	1.0
  (3306, 3302)	1.0
  (2169, 1133)	1.0
  (2169, 1170)	1.0
  (2169, 1529)	1.0
  (2169, 2166)	1.0
  (3308, 3309)	1.0
  (2171, 2172)	1.0
  (2174, 1385)	1.0
  (2174, 2173)	1.0
  (1008, 455)	1.0
  (2175, 2122)	1.0


In [16]:
edges

array([[   0,    0],
       [   0,   99],
       [   0,  111],
       ...,
       [2174, 2173],
       [1008,  455],
       [2175, 2122]], dtype=int32)

In [78]:
edges[edges == 'None'] = '0'  
edges = edges.astype(np.int32)

In [97]:
unique_values = np.unique(edges)
num_unique_values = len(unique_values)
num_unique_values

3313

In [101]:
len(idx_map)

3312

In [29]:
edges_temp

array([[   0,    0],
       [   0,   99],
       [   0,  111],
       ...,
       [2174, 2173],
       [1008,  455],
       [2175, 2122]], dtype=int32)

In [30]:
edges

array([[   0,    0],
       [   0,   99],
       [   0,  111],
       ...,
       [2174, 2173],
       [1008,  455],
       [2175, 2122]], dtype=int32)

In [25]:
print(edges_unordered[-10])
print(edges[-10])

['zhang99sdcc' '298012']
[2169 1133]


In [26]:
idx_map['zhang99sdcc']

2169

In [27]:
idx_map

{'100157': 0,
 '100598': 1,
 '105684': 2,
 '11099': 3,
 '114091': 4,
 '11510': 5,
 '115971': 6,
 '117999': 7,
 '120432': 8,
 '126894': 9,
 '128239': 10,
 '130387': 11,
 '157253': 12,
 '164846': 13,
 '164953': 14,
 '165504': 15,
 '172324': 16,
 '17752': 17,
 '184682': 18,
 '186486': 19,
 '187087': 20,
 '18915': 21,
 '1894': 22,
 '19199': 23,
 '192612': 24,
 '196686': 25,
 '196762': 26,
 '198191': 27,
 '206655': 28,
 '210': 29,
 '2102': 30,
 '21158': 31,
 '21655': 32,
 '226296': 33,
 '22863': 34,
 '233063': 35,
 '23381': 36,
 '236095': 37,
 '239748': 38,
 '241799': 39,
 '242172': 40,
 '243680': 41,
 '243755': 42,
 '243827': 43,
 '250204': 44,
 '250815': 45,
 '257383': 46,
 '267501': 47,
 '267965': 48,
 '270678': 49,
 '271013': 50,
 '271585': 51,
 '273596': 52,
 '276915': 53,
 '279508': 54,
 '282608': 55,
 '28307': 56,
 '28315': 57,
 '284454': 58,
 '284772': 59,
 '286829': 60,
 '292223': 61,
 '292524': 62,
 '29551': 63,
 '295535': 64,
 '296568': 65,
 '298502': 66,
 '298782': 67,
 '300584'

In [87]:
none_indices = np.where(edges == 'None')

print("Indices of 'None' values:", none_indices)
for i, j in zip(*none_indices):
    print(f"None found at position ({i}, {j}) with value '{edges[i, j]}'")

Indices of 'None' values: (array([ 231,  493,  662, 1167, 1455, 2387, 2975, 3049, 3050, 3904, 3905,
       3907, 4177, 4428, 4527, 4529, 4545]), array([0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0]))
None found at position (231, 0) with value 'None'
None found at position (493, 1) with value 'None'
None found at position (662, 0) with value 'None'
None found at position (1167, 0) with value 'None'
None found at position (1455, 1) with value 'None'
None found at position (2387, 0) with value 'None'
None found at position (2975, 0) with value 'None'
None found at position (3049, 1) with value 'None'
None found at position (3050, 0) with value 'None'
None found at position (3904, 0) with value 'None'
None found at position (3905, 0) with value 'None'
None found at position (3907, 1) with value 'None'
None found at position (4177, 1) with value 'None'
None found at position (4428, 0) with value 'None'
None found at position (4527, 1) with value 'None'
None found at position (4529, 1) 

In [35]:
none_indices = np.where(edges_temp == -1)
for i, j in zip(*none_indices):
    print(f"None found at position ({i}, {j}) with value '{edges_temp[i, j]}'")

None found at position (231, 0) with value '-1'
None found at position (493, 1) with value '-1'
None found at position (662, 0) with value '-1'
None found at position (1167, 0) with value '-1'
None found at position (1455, 1) with value '-1'
None found at position (2387, 0) with value '-1'
None found at position (2975, 0) with value '-1'
None found at position (3049, 1) with value '-1'
None found at position (3050, 0) with value '-1'
None found at position (3904, 0) with value '-1'
None found at position (3905, 0) with value '-1'
None found at position (3907, 1) with value '-1'
None found at position (4177, 1) with value '-1'
None found at position (4428, 0) with value '-1'
None found at position (4527, 1) with value '-1'
None found at position (4529, 1) with value '-1'
None found at position (4545, 0) with value '-1'


In [36]:
none_indices = np.where(edges == -1)
for i, j in zip(*none_indices):
    print(f"None found at position ({i}, {j}) with value '{edges[i, j]}'")

In [63]:
idx = idx_features_labels[:, 0]
idx_map = {j: i for i, j in enumerate(idx)}

len(idx)

idx

print(idx_map) #['zhou00implementation']

{'100157': 0, '100598': 1, '105684': 2, '11099': 3, '114091': 4, '11510': 5, '115971': 6, '117999': 7, '120432': 8, '126894': 9, '128239': 10, '130387': 11, '157253': 12, '164846': 13, '164953': 14, '165504': 15, '172324': 16, '17752': 17, '184682': 18, '186486': 19, '187087': 20, '18915': 21, '1894': 22, '19199': 23, '192612': 24, '196686': 25, '196762': 26, '198191': 27, '206655': 28, '210': 29, '2102': 30, '21158': 31, '21655': 32, '226296': 33, '22863': 34, '233063': 35, '23381': 36, '236095': 37, '239748': 38, '241799': 39, '242172': 40, '243680': 41, '243755': 42, '243827': 43, '250204': 44, '250815': 45, '257383': 46, '267501': 47, '267965': 48, '270678': 49, '271013': 50, '271585': 51, '273596': 52, '276915': 53, '279508': 54, '282608': 55, '28307': 56, '28315': 57, '284454': 58, '284772': 59, '286829': 60, '292223': 61, '292524': 62, '29551': 63, '295535': 64, '296568': 65, '298502': 66, '298782': 67, '300584': 68, '300852': 69, '301461': 70, '302575': 71, '302729': 72, '30362

In [60]:
np.genfromtxt("{}{}.cites".format(path, dataset),  dtype=np.dtype(str))

array([['100157', '100157'],
       ['100157', '364207'],
       ['100157', '38848'],
       ...,
       ['zheng98stochastic', 'zheng98integrating'],
       ['zhu00incorporating', 'clarke01exploiting'],
       ['zhu00segmenting', 'vonhardenberg01barehand']], dtype='<U33')

In [71]:
edges_unordered#.flatten()

array([['100157', '100157'],
       ['100157', '364207'],
       ['100157', '38848'],
       ...,
       ['zheng98stochastic', 'zheng98integrating'],
       ['zhu00incorporating', 'clarke01exploiting'],
       ['zhu00segmenting', 'vonhardenberg01barehand']], dtype='<U33')

In [65]:
print(list(map(idx_map.get, edges_unordered.flatten())))

[0, 0, 0, 99, 0, 111, 0, 381, 0, 415, 0, 514, 0, 585, 0, 690, 0, 691, 0, 783, 0, 784, 0, 954, 1, 153, 1, 732, 1, 1937, 2177, 2903, 1011, 1034, 1011, 2028, 1011, 2029, 2179, 2200, 2, 962, 1012, 2181, 1012, 2031, 1013, 1417, 1013, 1427, 1013, 1545, 1013, 2024, 1014, 1059, 1014, 1365, 1014, 1378, 1015, 1236, 1015, 1439, 1015, 1440, 1015, 2093, 2183, 2677, 2183, 845, 2183, 3156, 1016, 2138, 3, 634, 2184, 2306, 2184, 2356, 2185, 2539, 1017, 1264, 1017, 1549, 1017, 591, 1018, 1524, 4, 588, 2186, 3139, 1019, 609, 1019, 2064, 2187, 2666, 5, 36, 5, 63, 5, 97, 5, 603, 5, 949, 1021, 1176, 6, 64, 6, 682, 1022, 1907, 1023, 1875, 2188, 3300, 1024, 1527, 2189, 2303, 2189, 2634, 2190, 3145, 8, 868, 8, 1986, 1029, 1100, 1029, 1109, 1029, 1248, 2192, 2488, 1030, 1244, 1031, 1487, 1031, 1488, 1031, 1645, 1031, 1872, 1032, 2117, 9, 158, 9, 202, 9, 260, 9, 559, 9, 820, 9, 970, 11, 387, 11, 945, 1033, 1759, 1033, 2014, 1033, 2015, 1034, 1665, 1034, 1666, 1035, 1491, 1036, 1485, 1037, 1042, 1037, 1309, 1037,

In [35]:
idx

array([  31336, 1061127, 1106406, ..., 1128978,  117328,   24043],
      dtype=int32)

In [42]:
idx_features_labels

array([['100157', '0', '0', ..., '0', '0', 'Agents'],
       ['100598', '0', '0', ..., '0', '0', 'IR'],
       ['105684', '0', '1', ..., '0', '0', 'Agents'],
       ...,
       ['zhang99towards', '0', '0', ..., '0', '0', 'IR'],
       ['zhou00implementation', '0', '0', ..., '0', '0', 'DB'],
       ['455346', '0', '0', ..., '0', '0', 'ML']], dtype='<U33')

In [28]:
idx_map

{31336: 0,
 1061127: 1,
 1106406: 2,
 13195: 3,
 37879: 4,
 1126012: 5,
 1107140: 6,
 1102850: 7,
 31349: 8,
 1106418: 9,
 1123188: 10,
 1128990: 11,
 109323: 12,
 217139: 13,
 31353: 14,
 32083: 15,
 1126029: 16,
 1118017: 17,
 49482: 18,
 753265: 19,
 249858: 20,
 1113739: 21,
 48766: 22,
 646195: 23,
 1126050: 24,
 59626: 25,
 340299: 26,
 354004: 27,
 242637: 28,
 1106492: 29,
 74975: 30,
 1152272: 31,
 100701: 32,
 66982: 33,
 13960: 34,
 13966: 35,
 66990: 36,
 182093: 37,
 182094: 38,
 13972: 39,
 13982: 40,
 16819: 41,
 273152: 42,
 237521: 43,
 1153703: 44,
 32872: 45,
 284025: 46,
 218666: 47,
 16843: 48,
 1153724: 49,
 1153728: 50,
 158098: 51,
 8699: 52,
 1134865: 53,
 28456: 54,
 248425: 55,
 1112319: 56,
 28471: 57,
 175548: 58,
 696345: 59,
 28485: 60,
 1139195: 61,
 35778: 62,
 28491: 63,
 310530: 64,
 1153784: 65,
 1481: 66,
 1153786: 67,
 13212: 68,
 1111614: 69,
 5055: 70,
 4329: 71,
 330148: 72,
 1105062: 73,
 4330: 74,
 5062: 75,
 4335: 76,
 158812: 77,
 40124: 78,

In [23]:
print(edges_unordered)

array([[     35,    1033],
       [     35,  103482],
       [     35,  103515],
       ...,
       [ 853118, 1140289],
       [ 853155,  853118],
       [ 954315, 1155073]], dtype=int32)

In [24]:
print(edges)

[[ 163  402]
 [ 163  659]
 [ 163 1696]
 ...
 [1887 2258]
 [1902 1887]
 [ 837 1686]]


In [None]:

# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
features = normalize_features(features)
adj = normalize_adj(adj + sp.eye(adj.shape[0]))
idx_train = range(140)
idx_val = range(200, 500)
idx_test = range(500, 1500)
adj = torch.FloatTensor(np.array(adj.todense()))
features = torch.FloatTensor(np.array(features.todense()))
labels = torch.LongTensor(np.where(labels)[1])
idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

In [20]:
print(adj, features, labels, idx_train, idx_val, idx_test)

tensor([[0.1667, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.2000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.2000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.2500]]) tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) tensor([2, 5, 4,  ..., 1, 0, 2]) tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
         28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
         42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
   

In [10]:
# import networkx as nx
# import numpy as np
# import pickle as p
# from matplotlib import pyplot as plt
# %matplotlib inline

# data_loc = 'data/cora/'  # 'citeseer.cites', 'citeseer.content'

# graph_file = open(data_loc+'citeseer.cites', 'r')
# for _ in range(5): print(repr(graph_file.readline()))

# graph_file.seek(0)
# iid = {}  # Integer id conversion dict
# idx = 0
# citeseer_edgelist = []
# for line in graph_file.readlines():
#     i, j = line.split()
#     if i not in iid:
#         iid[i] = idx
#         idx += 1
#     if j not in iid:
#         iid[j] = idx
#         idx += 1
#     citeseer_edgelist.append((iid[j],iid[i]))  # Correct direction of links

# print("Number of edges:", len(citeseer_edgelist))
# citeseer = nx.DiGraph(citeseer_edgelist)
# print("Number of nodes:", len(citeseer))
# graph_file.close()

# citeseer_labels = np.ndarray(shape=(len(iid)), dtype=int)
# citeseer_features = np.ndarray(shape=(len(iid), 3703), dtype=int)
# labels = {'Agents': 0, 'AI': 1, 'DB': 2, 'IR': 3, 'ML': 4, 'HCI': 5}
# no_labels = set(citeseer.nodes())

# # Read data
# with open(data_loc+'citeseer.content', 'r') as f:
#     for line in f.readlines():
#         oid, *data, label = line.split()
#         citeseer_labels[iid[oid]] = labels[label]
#         citeseer_features[iid[oid],:] = list(map(int, data))
#         no_labels.remove(iid[oid])
        
# for i in no_labels:
#     citeseer_labels[i] = -1
#     citeseer_features[i,:] = np.zeros(3703)
    
# # Validation
# with open(data_loc+'citeseer.content', 'r') as f:
#     for line in f.readlines():
#         oid, *data, label = line.split()
#         assert citeseer_labels[iid[oid]] == labels[label]
#         assert citeseer_labels[iid[oid]] < 6
#         assert sum(citeseer_features[iid[oid]]) == sum(map(int, data))
#     print("Validation for `citeseer_labels` and `citeseer_features` passes.")

# print("Feature shape: ", citeseer_features.shape)
# print("Label shape: ", citeseer_labels.shape)