In [8]:
import networkx as nx
import powerlaw
import numpy as np
import pandas as pd
import multiprocessing as mp

import sys
sys.path.append('..')
import network_utils as ne

In [9]:
ROUND_DIG = 4
with pd.HDFStore('../data/gene_network_data.h5') as store:
    tec = store['TEC']
np_tec_abs = np.abs(tec.to_numpy(copy=True))

In [10]:
thresholds = [0.9, 0.85]
#! Running takes multiple hours to finish
# thresholds = [0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]
results = []
for thresh in thresholds:
    curr_res = []
    curr_res.append(thresh)
    curr_al = ne.threshold_weighted_adjacency_list(np_tec_abs, thresh)
    curr_graph = ne.construct_network(curr_al, "TEC", tec.columns)

    clustering = nx.average_clustering(curr_graph)
    curr_res.append(clustering)

    connected_components = nx.connected_components(curr_graph)
    connected_components = sorted(connected_components, key=len, reverse=True)
    largest = connected_components[0]
    curr_graph = curr_graph.subgraph(largest).copy(True)

    shortest_p = nx.average_shortest_path_length(curr_graph)
    curr_res.append(shortest_p)

    results.append(curr_res)

In [11]:
result_df = pd.DataFrame(
    results, 
    columns=['Threshold', 'Global clustering coefficient', 'Average shortest path']
)
result_df.head()

Unnamed: 0,Threshold,Global clustering coefficient,Average shortest path
0,0.9,0.47681,3.830646
1,0.85,0.297614,4.168725
