### Loading Data From File

In [1]:
import torch 
import networkx as nx
import numpy as np
import scipy.sparse as sp
import dgl
import random

#Seeds
dgl.seed(42)
random.seed(42)
np.random.seed(42)

In [2]:
PATH_TO_GRAPH_FILE = "Cit-HepTh.txt"
graphAdjList = []
with open(PATH_TO_GRAPH_FILE, 'r') as f:
    L = f.readlines()
    for line_ in L:
        if "#" in line_:
            continue 
        src, dst = map(lambda x: int(x), line_.strip().split('\t'))
        graphAdjList.append([src,dst])

### Loading Data into NetworkX

In [3]:
nx_g = nx.DiGraph()
nx_g.add_edges_from(graphAdjList)

paper_to_node = {node:index for index, node in enumerate(sorted(nx_g.nodes())) }
node_to_paper = {v:k for k, v in paper_to_node.items()}

### Loading Data Into DGL

In [4]:
print(torch.cuda.is_available())
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print("Using device:", device)

False
Using device: cpu


In [50]:
# pip install dgl dglgo -f https://data.dgl.ai/wheels/repo.html

In [6]:
g = dgl.from_networkx(nx_g)

### Adding Node Features
- https://docs.dgl.ai/en/0.7.x/guide/graph-feature.html#guide-graph-feature

In [7]:
EMB_SIZE = 20
g.ndata['x'] = torch.ones(g.num_nodes(), EMB_SIZE) 

### Training Test Splits
- https://docs.dgl.ai/en/0.7.x/tutorials/blitz/4_link_predict.html#prepare-training-and-testing-sets

In [8]:
# Split edge set for training and testing
u, v = g.edges()

eids = np.arange(g.number_of_edges())
eids = np.random.permutation(eids)
test_size = int(len(eids) * 0.1)
train_size = g.number_of_edges() - test_size
test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]]

# Find all negative edges and split them for training and testing
neg_u, neg_v = dgl.sampling.global_uniform_negative_sampling(g, g.number_of_edges())
test_neg_u, test_neg_v = neg_u[:test_size], neg_v[:test_size]
train_neg_u, train_neg_v = neg_u[test_size:], neg_v[test_size:]

In [9]:
# Removing test edges from graph
train_g = dgl.remove_edges(g, eids[:test_size])