# Examples and calculations for real hypergraphs

In [1]:
import copy
import numpy as np
import xgi
import pandas
from itertools import permutations
from itertools import combinations
import matplotlib.pyplot as plt

from hyperfunctions import *

In [5]:
H = benson_datasets_to_hypergraphs("Datasets/tags-ask-ubuntu/tags-ask-ubuntu-nverts.txt","Datasets/tags-ask-ubuntu/tags-ask-ubuntu-simplices.txt")

In [6]:
len(H.nodes), len(H.edges), H.edges.size.max()

(3029, 271233, 5)

In [7]:
H.remove_edges_from(H.edges.singletons()) # Remove edges with a single node in them (there are ~5000)
H.remove_nodes_from(H.nodes.isolates())   # There are 6 nodes which only belonged to those singleton edges, we remove them as well.
# Relabel all nodes to start from zero to the current number of nodes
H = xgi.convert_labels_to_integers(H)

H.cleanup(connected=False) #This basically does the same as the three above + checks for multiedges and removes them

In [8]:
len(H.nodes), len(H.edges), H.edges.size.max()

(3021, 145053, 5)

In [9]:
xgi.is_connected(H)

True

## UPHEC at different orders

### Order 2: CEC-like

In [10]:
T2 = uniform_adjacency_combinatorial_tensor(H, m = 2, math_notation = True)

In [11]:
cent2 = HEC_ours(T2, m=2, niter=100000, tol=1e-6, verbose=True)

Finished in 13 iterations.


In [12]:
cent2

(array([8.30017464e-04, 1.42259173e-03, 2.05392467e-03, ...,
        2.31678743e-05, 3.12382473e-05, 1.27003754e-03]),
 True)

In [13]:
len(cent2[0])

3021

In [14]:
if cent2[1]:
    df = pandas.DataFrame({"UPHEC-2": cent2[0]})

### Order 3

In [15]:
T3 = uniform_adjacency_combinatorial_tensor(H, m = 3, math_notation = True)

In [16]:
cent3 = HEC_ours(T3, m=3, niter=100000, tol=1e-6, verbose=True)

Finished in 12 iterations.


In [17]:
cent3

(array([6.96678151e-04, 1.03193071e-03, 1.28160286e-03, ...,
        4.00850241e-05, 8.40228642e-05, 2.17184840e-03]),
 True)

In [18]:
len(cent3[0])

3021

In [134]:
if cent3[1]:
    df["UPHEC-3"] = cent3[0]

### Order 4

In [20]:
T4 = uniform_adjacency_combinatorial_tensor(H, m=4, math_notation = True)

In [21]:
cent4 = HEC_ours(T4, m=4, niter=100000, tol=1e-6, verbose=True)

Finished in 14 iterations.


In [22]:
cent4

(array([0.00058355, 0.00081595, 0.000963  , ..., 0.00010992, 0.00011242,
        0.00182581]),
 True)

In [23]:
len(cent4[0])

3021

In [133]:
if cent4[1]:
    df["UPHEC-4"] = cent4[0]

### Order 5

In [116]:
T5 = uniform_adjacency_combinatorial_tensor(H, m=5, math_notation = True)

In [117]:
cent5 = HEC_ours(T5, m=5, niter=100000, tol=1e-6, verbose=True)

  s = np.divide(y, np.power(x, m))


Finished in 17 iterations.


In [118]:
cent5

(array([0.00053088, 0.00068204, 0.0007915 , ..., 0.0001278 , 0.        ,
        0.0014535 ]),
 True)

In [121]:
len(cent5[0]) #There is an extra node!

3022

In [135]:
cent5real = cent5[0][:-1] #extract last node
cent5real/= np.sum(cent5real) # normalize
len(cent5real)

3021

In [136]:
if cent5[1]:
    df["UPHEC-5"] = cent5real

## Uniform HEC at each order (as in Benson's work)

In [31]:
edgedict = H.edges.members(dtype=dict)

### Order 2

In [77]:
Hu2 = H.copy()
for edge, members in edgedict.items():
    if len(members) != 2:
        Hu2.remove_edge(edge)

In [78]:
xgi.is_connected(Hu2)

False

In [79]:
Hu2.remove_nodes_from(Hu2.nodes - xgi.algorithms.largest_connected_component(Hu2))

In [80]:
xgi.is_connected(Hu2)

True

In [81]:
cent2unif = xgi.algorithms.h_eigenvector_centrality(Hu2)

In [82]:
len(cent2unif)

2714

In [83]:
for missing_node in range(len(H.nodes)):
    if missing_node not in cent2unif.keys():
        cent2unif[missing_node] = 0

In [86]:
df["HEC-2unif"] = cent2unif

### Order 3

In [87]:
Hu3 = H.copy()
for edge, members in edgedict.items():
    if len(members) != 3:
        Hu3.remove_edge(edge)

In [88]:
xgi.is_connected(Hu3)

False

In [89]:
Hu3.remove_nodes_from(Hu3.nodes - xgi.algorithms.largest_connected_component(Hu3))

In [90]:
xgi.is_connected(Hu3)

True

In [91]:
cent3unif = xgi.algorithms.h_eigenvector_centrality(Hu3)

In [92]:
len(cent3unif)

2821

In [93]:
for missing_node in range(len(H.nodes)):
    if missing_node not in cent3unif.keys():
        cent3unif[missing_node] = 0

In [95]:
df["HEC-3unif"] = cent3unif

### Order 4

In [96]:
Hu4 = H.copy()
for edge, members in edgedict.items():
    if len(members) != 4:
        Hu4.remove_edge(edge)

In [97]:
xgi.is_connected(Hu4)

False

In [98]:
Hu4.remove_nodes_from(Hu4.nodes - xgi.algorithms.largest_connected_component(Hu4))

In [99]:
xgi.is_connected(Hu4)

True

In [100]:
cent4unif = xgi.algorithms.h_eigenvector_centrality(Hu4)

In [101]:
len(cent4unif)

2722

In [102]:
for missing_node in range(len(H.nodes)):
    if missing_node not in cent4unif.keys():
        cent4unif[missing_node] = 0

In [103]:
df["HEC-4unif"] = cent4unif

### Order 5

In [113]:
Hu5 = H.copy()
for edge, members in edgedict.items():
    if len(members) != 5:
        Hu5.remove_edge(edge)

In [114]:
xgi.is_connected(Hu5)

False

In [115]:
Hu5.remove_nodes_from(Hu5.nodes - xgi.algorithms.largest_connected_component(Hu5))

In [107]:
xgi.is_connected(Hu5)

True

In [108]:
cent5unif = xgi.algorithms.h_eigenvector_centrality(Hu5)

In [109]:
len(cent5unif)

2564

In [110]:
for missing_node in range(len(H.nodes)):
    if missing_node not in cent5unif.keys():
        cent5unif[missing_node] = 0

In [111]:
df["HEC-5unif"] = cent5unif

## Save everything

In [137]:
df.to_csv("Output/tags_ask_ubuntu_dataframe.csv", index=False)

In [138]:
df

Unnamed: 0,UPHEC-2,UPHEC-3,UPHEC-4,HEC-2unif,HEC-3unif,HEC-4unif,HEC-5unif,UPHEC-5
0,0.000830,0.000697,0.000584,0.001732,0.001297,0.001102,0.001049,0.000532
1,0.001423,0.001032,0.000816,0.001214,0.000900,0.000622,0.000605,0.000683
2,0.002054,0.001282,0.000963,0.002811,0.001778,0.001147,0.000838,0.000793
3,0.000357,0.000391,0.000405,0.004494,0.002335,0.001402,0.001039,0.000379
4,0.002115,0.001289,0.001037,0.000262,0.000387,0.000422,0.000357,0.000936
...,...,...,...,...,...,...,...,...
3016,0.000025,0.000040,0.000105,0.000012,0.000061,0.000000,0.000000,0.000132
3017,0.000028,0.000081,0.000109,0.000000,0.000000,0.000073,0.000000,0.000122
3018,0.000023,0.000040,0.000110,0.000000,0.000030,0.000000,0.000000,0.000130
3019,0.000031,0.000084,0.000112,0.000000,0.000000,0.000074,0.000000,0.000128
