In [47]:
import numpy as np
import pandas as pd
import scipy.sparse as sp

import dgl
import networkx as nx
import numpy as np
import torch
import torch.nn.functional as F
from dgl.dataloading import GraphDataLoader
from sklearn.model_selection import train_test_split
from time import time
from tqdm import tqdm

In [21]:
graph_idx = np.loadtxt("./AIDS/AIDS.graph_idx")
nodes_features = np.loadtxt("./AIDS/AIDS.node_attrs", delimiter=',')
edges_list = np.loadtxt("./AIDS/AIDS.edges", delimiter=',')
labels = np.loadtxt("./AIDS/AIDS.graph_labels")

In [25]:
edges_list.shape

(64780, 2)

In [43]:
def load_data(graph_indicator, edges, x): 
    """
    Function that loads graphs data
    """
    _,graph_size = np.unique(graph_indicator, return_counts=True)

    edges_inv = np.vstack((edges[:,1], edges[:,0]))
    edges = np.vstack((edges, edges_inv.T)) - 1
    s = edges[:,0]*graph_indicator.size + edges[:,1]
    idx_sort = np.argsort(s)
    edges = edges[idx_sort,:]
    edges,idx_unique =  np.unique(edges, axis=0, return_index=True)

    A = sp.csr_matrix((np.ones(edges.shape[0]), (edges[:,0], edges[:,1])), shape=(graph_indicator.size, graph_indicator.size))
    
    adj = []
    features = []
    edges_couples = []
    idx_n = 0
    idx_m = 0
    for i in range(graph_size.size):
        adj.append(A[idx_n:idx_n+graph_size[i],idx_n:idx_n+graph_size[i]])
        couples = edges[idx_m:idx_m+adj[i].nnz,:]
        couples = couples - np.min(couples)
        edges_couples.append(couples)
        features.append(x[idx_n:idx_n+graph_size[i],:])
        idx_n += graph_size[i]
        idx_m += adj[i].nnz

    return adj, features, edges_couples

In [44]:
adj, features, edges_couples = load_data(graph_idx, edges_list, nodes_features)

In [48]:
def net_graph(features, couples):
    """
    Create a networkx graph

    Parameters
    ----------
    features : list of np.arrays
        list of features per node
    couples : list of tuples
        list of the related nodes
    edg_feat : list of np.arrays
        list of features per node
    """
    G = nx.Graph()
    for i in range(len(couples)):
        # Keep only edges not based on distance
        [l, m] = couples[i]
        G.add_edge(l, m)
    for node in range(len(features)):
        G.add_node(node, feature=features[node])
    return G

In [54]:
# Create the dgl graphs
dataset = list()
for k in tqdm(range(len(features))):
    dataset.append(dgl.from_networkx(net_graph(features[k], edges_couples[k]),
                                        node_attrs=['feature']))

# dgl.save_graphs('./data/train_full_graphs_t5.dgl', dataset)

# dataset = dgl.load_graphs('./data/train_full_graphs_t5.dgl')
# Ensure to have self loop in the graph
dataset = [(dgl.add_self_loop(dataset[i]), torch.tensor(labels[i])) for i in range(len(dataset))]

# dgl.save_graphs('./data/train_full_graphs_t5.dgl', dataset)

100%|██████████| 2000/2000 [00:00<00:00, 2380.46it/s]


In [55]:
train_dataset, test_dataset = train_test_split(dataset, test_size=0.25)

train_dataloader = GraphDataLoader(train_dataset, batch_size=16, drop_last=False)
test_dataloader = GraphDataLoader(test_dataset, batch_size=16, drop_last=False)