In [1]:
import hypernetx as hnx
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import sys

## Load tables

In [2]:
dff = pd.read_pickle("biggerTrans.pkl")

In [3]:
dffwos2 = pd.read_pickle("biggerTransNoSars2.pkl")

In [4]:
dffwos = pd.read_pickle("biggerTransNoSars.pkl")

In [5]:
dffwoc = pd.read_pickle("biggerTransNoCov.pkl")

## Build the hypergraph

In [6]:
dff.shape

(9524, 169)

In [7]:
# now we use some (though not all) of the from_dataframe() arguments and
# let the function take care of the dataframe manipulation
Hf = hnx.Hypergraph.from_dataframe(dff, # the whole dataframe, b and p columns
                                                    #columns=human_b_cols, # choose specific columns
                                                    zsc='columns', # other option is 'rows'
                                                    absolute=True, # absolute value after z-score is taken
                                                    lower_thresh=2) # applies the > 2 threshold after zscore and absolute value)

# options that I used the defaults for:
# transpose = False: this will transpose the dataframe after z-score and absolute value, essentially creating the dual hypergraph. Instead we're taking the dual after the fact (below).
# name = None (string): If you want to give the resulting hypergraph a "name" attribute. Not necessary.
# key = None (function which evaluates True or False): This is for more complcated thresholding. If you're just doing z-score > some threshold you don't need to worry about this.
# rows = None (list of row names): If you want to use only a subset of the rows. This is done before taking z-score so your z-score will be relative only to those rows chosen.
# upper_thresh = None (number): You can have a maximum value for the the zscore if you want. You can use both upper_thresh and lower_thresh.

In [8]:
# running example here for large s value because it finishes relatively quickly. 
# Small s values take a long time on these large hypergraphs!
#betcen30 = hnx.s_betweenness_centrality(HfD, s=30)
#clocen30 = hnx.s_harmonic_closeness_centrality(HfD, s=30)

In [9]:
Hf.shape

(8010, 159)

In [10]:
HfD = Hf.dual()
HfD.shape

(159, 8010)

In [12]:
graph = HfD

In [13]:
import ray

In [14]:
ray.init(num_cpus=7)

2020-06-03 15:47:37,049	INFO resource_spec.py:204 -- Starting Ray with 9.81 GiB memory available for workers and up to 4.92 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-06-03 15:47:38,173	INFO services.py:563 -- Failed to connect to the redis server, retrying.
2020-06-03 15:47:39,526	INFO services.py:1168 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


{'node_ip_address': '130.20.211.225',
 'raylet_ip_address': '130.20.211.225',
 'redis_address': '130.20.211.225:60048',
 'object_store_address': '/tmp/ray/session_2020-06-03_15-47-37_031702_3119/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-06-03_15-47-37_031702_3119/sockets/raylet',
 'webui_url': 'localhost:8265',
 'session_dir': '/tmp/ray/session_2020-06-03_15-47-37_031702_3119'}

In [24]:
@ray.remote
def betweenness(s, graph = graph):
    sbt = hnx.s_betweenness_centrality(graph, s=s)
    sbt = pd.Series(sbt)
    sbt.to_pickle("intermediate/" + str(s) + "-betweenness.pkl")
    return(sbt.rename(s))

@ray.remote
def closeness(s, graph = graph):
    scl = hnx.s_harmonic_closeness_centrality(graph, s=s)
    scl = pd.Series(scl)
    scl.to_pickle("intermediate/" + str(s) + "-closeness.pkl")
    return(scl.rename(s))

In [25]:
HfDsbt2 = ray.get([betweenness.remote(i) for i in range(51,graph.shape[0])])
HfDscl2 = ray.get([closeness.remote(i) for i in range(51,graph.shape[0])])

In [33]:
max([i for i in range(len(HfDsbt2)) if len(HfDsbt2[i]) > 0])

29

In [27]:
HfDsbt2[29].name

80

In [32]:
max([i for i in range(len(HfDscl2)) if len(HfDscl2[i]) > 0])

59

In [31]:
HfDscl2[59].name

110

In [34]:
ray.shutdown()