In [3]:
import networkx as nx
import torch_geometric
import xgi
import torch_geometric.datasets as geom_datasets
# 1. Load Cora
dataset = geom_datasets.Planetoid(root="data/cora", name="Cora")
data = dataset[0]

# 2. Convert to NetworkX
# We convert to undirected because 'clique' implies symmetric relationships
G = torch_geometric.utils.to_networkx(data, to_undirected=True)

# 3. "Reverse" Clique Expansion
# We assume the graph edges are cliques formed by hyperedges. 
# We detect maximal cliques to reconstruct those hyperedges.
# Note: nx.find_cliques returns an iterator of maximal cliques.
cliques = [i for i in nx.find_cliques(G)]

# 4. Create XGI Hypergraph
H = xgi.Hypergraph(cliques)

In [None]:
impo

In [96]:
import pickle
import os.path as osp
import torch
import numpy as np
import xgi

# from AllSet loading
def getCora():
    coraPath = "data/cocitation/cora/"
    print(f'Loading hypergraph dataset from hyperGCN: {dataset}')

    # first load node features:
    with open(coraPath + 'features.pickle', 'rb') as f:
        features = pickle.load(f)
        features = features.todense()

    with open(coraPath + 'labels.pickle', 'rb') as f:
        labels = pickle.load(f)

    num_nodes, feature_dim = features.shape
    assert num_nodes == len(labels)
    print(f'number of nodes:{num_nodes}, feature dimension: {feature_dim}')

    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)

    # The last, load hypergraph.
    with open(coraPath + 'hypergraph.pickle', 'rb') as f:
        # hypergraph in hyperGCN is in the form of a dictionary.
        # { hyperedge: [list of nodes in the he], ...}
        hypergraph = pickle.load(f)

    print(f'number of hyperedges: {len(hypergraph)}')

    # exploration
    maxEdgeId = max(hypergraph)
    for i in range(num_nodes):
        hypergraph[maxEdgeId+i] = {i}
    H = xgi.Hypergraph(hypergraph)


    return H, features, labels
    # edge_idx = num_nodes
    # node_list = []
    # edge_list = []
    # for he in hypergraph.keys():
    #     cur_he = hypergraph[he]
    #     cur_size = len(cur_he)

    #     node_list += list(cur_he)
    #     edge_list += [edge_idx] * cur_size

    #     edge_idx += 1

    # edge_index = np.array([ node_list + edge_list,
    #                         edge_list + node_list], dtype = int)
    # edge_index = torch.LongTensor(edge_index)

    # print(edge_index.shape)

    # data = Data(x = features,
    #             edge_index = edge_index,
    #             y = labels)

    # # data.coalesce()
    # # There might be errors if edge_index.max() != num_nodes.
    # # used user function to override the default function.
    # # the following will also sort the edge_index and remove duplicates. 
    # total_num_node_id_he_id = edge_index.max() + 1
    # data.edge_index, data.edge_attr = coalesce(data.edge_index, 
    #         None, 
    #         total_num_node_id_he_id, 
    #         total_num_node_id_he_id)
getCora()           

#     n_x = num_nodes
# #     n_x = n_expanded
#     num_class = len(np.unique(labels.numpy()))
#     val_lb = int(n_x * train_percent)
#     percls_trn = int(round(train_percent * n_x / num_class))
#     # data = random_planetoid_splits(data, num_class, percls_trn, val_lb)
#     data.n_x = n_x
#     # add parameters to attribute
    
#     data.train_percent = train_percent
#     data.num_hyperedges = len(hypergraph)
    
    # return data


Loading hypergraph dataset from hyperGCN: Cora()
number of nodes:2708, feature dimension: 1433
number of hyperedges: 1579


  features = pickle.load(f)


(<xgi.core.hypergraph.Hypergraph at 0x17fc4b090>,
 tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([3, 2, 6,  ..., 0, 4, 3]))

In [97]:
getCora()

Loading hypergraph dataset from hyperGCN: Cora()
number of nodes:2708, feature dimension: 1433
number of hyperedges: 1579


  features = pickle.load(f)


(<xgi.core.hypergraph.Hypergraph at 0x17f3322d0>,
 tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([3, 2, 6,  ..., 0, 4, 3]))

In [98]:
import scipy
def coo2Sparse(coo) -> torch.Tensor:
    if not isinstance(coo, scipy.sparse.coo_array):
        coo = coo.tocoo()
    indices = np.vstack((coo.row, coo.col))
    values = coo.data
    shape = coo.shape

    i = torch.LongTensor(indices)
    v = torch.FloatTensor(values)
    return torch.sparse_coo_tensor(i, v, torch.Size(shape))

In [124]:
H, x, y = getCora()
H.cleanup(isolates=True, connected=False, relabel=False)
incidence_1 = coo2Sparse(xgi.convert.to_incidence_matrix(H, sparse=True))

Loading hypergraph dataset from hyperGCN: Cora()
number of nodes:2708, feature dimension: 1433
number of hyperedges: 1579


  features = pickle.load(f)


In [125]:
x.shape

torch.Size([2708, 1433])