In [3]:
from os import listdir
from os.path import isfile, join
import networkx as nx
import collections
import matplotlib.pyplot as plt
import numpy as np
import random
import networkx as nx
import networkx.algorithms.community as nx_comm
import numpy as np
import math
from scipy.sparse.linalg import eigsh
import json
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)
def gen_graph(cur_n, g_type,seed=None):
    random.seed(seed)
    if g_type == 'erdos_renyi':
        #print(random.uniform(0.15,0.20))
        g = nx.erdos_renyi_graph(n=cur_n, p=random.uniform(0.15,0.20),seed = seed)
    elif g_type == 'powerlaw':
        g = nx.powerlaw_cluster_graph(n=cur_n, m=random.randint(2,4), p=random.uniform(0.1,0.5),seed = seed)
    elif g_type == 'small-world':
        #print(random.randint(2,5),random.uniform(0.1,0.2))
        g = nx.newman_watts_strogatz_graph(n=cur_n, k=random.randint(2,5), p=random.uniform(0.1,0.2),seed = seed)
    elif g_type == 'barabasi_albert':
        #print(random.randint(2,5))
        g = nx.barabasi_albert_graph(n=cur_n, m=random.randint(2,5),seed = seed)
    elif g_type == 'geometric':
        g = nx.random_geometric_graph(cur_n, random.uniform(0.1,0.4),seed = seed)
    return g
    
def get_from_json(file_path):
    with open(file_path) as json_file:
        data = json.load(json_file)
    return data
def gen_new_graphs(graph_type,seed = None):
    random.seed(seed)
    np.random.seed(seed)
    a = np.random.choice(graph_type) if len(graph_type) !=1 else graph_type[0]
    number_nodes = 30
    graph = gen_graph(number_nodes, a,seed)
    #graph =add_super_node(graph)
    active = 1
    nx.set_node_attributes(graph,active, "active")
    return graph   
def input_graph(graph_path,file):
    fh = open(graph_path+str(file)+".txt", "rb")
    GRAPH = nx.read_edgelist(fh)
    fh.close()
    nodes = GRAPH.nodes()
    map = {n:int(i) for i, n in enumerate(nodes)}
    GRAPH = nx.relabel_nodes(GRAPH, map)
    GRAPH.remove_edges_from(nx.selfloop_edges(GRAPH))
    return GRAPH, map


In [4]:
def molloy_reed(g):
  all_degree =   np.array(g.degree())[:,1]
  degs = np.delete(all_degree,-1)
  k = degs.mean()
  k2 = np.mean(degs ** 2)
  beta = k2/k
  return beta
def global_feature(g): 
    feature = {}
    M = len(g.edges())
    N = len(g)
    degs =   np.array(g.degree())[:,1]
    min_k = np.min(degs)
    
    k1 = degs.mean()
    k2 = np.mean(degs** 2)
    div = k2 - k1**2
    A = nx.to_scipy_sparse_array(g, weight='weight',dtype=float,format='csr')
    adj_lams = np.sort(eigsh(A, k=N-1, which='LA', return_eigenvectors=False))
    feature["nodes"] = N
    feature["edges"] = M
    feature["heterogeneity"] = div/k1
    feature["density"] = (2*M)/(N*(N-1))
    feature["resilience"] = molloy_reed(g)
    #power_law_exponent = 1 + N / sum(np.log(degs/min_k))
    feature["modularity"] = nx_comm.modularity(g,nx_comm.label_propagation_communities(g))
    try:
        varepsilons = nx.algorithms.distance_measures.eccentricity(g).values()
        feature["eccentricity"]=  np.mean(list(varepsilons))
        feature["diameter"] = nx.algorithms.distance_measures.diameter(g)
        feature["radius"] = nx.algorithms.distance_measures.radius(g)
    except:
        lcc = max(nx.connected_components(g), key=len)
        subGraph = g.subgraph(lcc)
        varepsilons = nx.algorithms.distance_measures.eccentricity(subGraph).values()
        feature["eccentricity"]=  np.mean(list(varepsilons))
        feature["diameter"] = nx.algorithms.distance_measures.diameter(subGraph)
        feature["radius"] = nx.algorithms.distance_measures.radius(subGraph)
    feature["spectral_radius"]=adj_lams[-1]
    feature["spectral_gap"]=  adj_lams[-1]-adj_lams[-2]
    feature["natural_connectivity"]= np.log2(sum(np.exp(np.real(adj_lams)))/len(adj_lams))
    feature["global_efficiency"] = nx.algorithms.efficiency_measures.global_efficiency(g)
    feature["assortativity"] = nx.algorithms.assortativity.degree_assortativity_coefficient(g)
    #global_properties =np.hstack((nodes, edges,density,resilience,heterogeneity,power_law_exponent, modularity,eccentricity,diameter,radius,spectral_radius,spectral_gap,natural_connectivity,assortativity))
    return feature
feature = get_from_json("../Dataset/features.json")

# Synthetic Graph

In [34]:
graph_path= ["../Dataset/HomogeneityGraph/","../Dataset/SyntheticGraph/"]
for path in graph_path:
    file_list = [f.split(".")[0] for f in listdir(path) if isfile(join(path, f))] 
    for i, graph_name in enumerate(file_list):
        GRAPH, _ = input_graph(path,graph_name)
        feature[graph_name] =  global_feature(GRAPH)
    

# BA with Motifs Attached

In [5]:
graph_path = "../Dataset/Validation/Motifs_Attached/BA/"
file_list = [f.split(".")[0] for f in listdir(graph_path) if isfile(join(graph_path, f))] 
for i, graph_name in enumerate(file_list):
    GRAPH, _ = input_graph(graph_path,graph_name)
    feature[graph_name] =  global_feature(GRAPH)

# Tree with Motifs Attached

In [6]:
graph_path ="../Dataset/Validation/Motifs_Attached/Tree/" 
file_list = [f.split(".")[0] for f in listdir(graph_path) if isfile(join(graph_path, f))] 
for i, graph_name in enumerate(file_list):
    GRAPH, _ = input_graph(graph_path,graph_name)
    feature[graph_name] =  global_feature(GRAPH)

# Real World Network

In [9]:
graph_path =  "../Dataset/Real/"
file_list = ["corruption","foodweb-baywet","inf-USAir97","moreno_crime_projected",'opsahl-openflights','household','faa','facebook','powergrid','netscience']#[f.split(".")[0] for f in listdir(graph_path) if isfile(join(graph_path, f))] 
for i, graph_name in enumerate(file_list):
    print(graph_name, end=" ,")
    GRAPH, _ = input_graph(graph_path,graph_name)
    feature[graph_name] =  global_feature(GRAPH)

corruption ,foodweb-baywet ,inf-USAir97 ,moreno_crime_projected ,opsahl-openflights ,household ,faa ,facebook ,powergrid ,netscience ,

# Cancer Gene Network

In [None]:
graph_path = "../Dataset/GeneNetwork/"
file_list = [f.split(".")[0] for f in listdir(graph_path) if isfile(join(graph_path, f))] 
for i, graph_name in enumerate(file_list):
    GRAPH, _ = input_graph(graph_path,graph_name)
    feature[graph_name] =  global_feature(GRAPH)

In [36]:
print(feature)

{'moreno_crime_projected': {'nodes': 754, 'edges': 2127, 'heterogeneity': 5.480798225168634, 'density': 0.007492576114639585, 'resilience': 11.125088173054314, 'modularity': 0.8006332268597997, 'eccentricity': 12.302387267904509, 'diameter': 16, 'radius': 8, 'spectral_radius': 18.06580694174004, 'spectral_gap': 4.0010810887095705, 'natural_connectivity': 16.558278000253285, 'global_efficiency': 0.17895793744673452, 'assortativity': 0.14183676257165753}, 'corruption': {'nodes': 309, 'edges': 3281, 'heterogeneity': 13.689081689318414, 'density': 0.06894885050224855, 'resilience': 34.9304983996342, 'modularity': 0.6753895810676882, 'eccentricity': 5.281553398058253, 'diameter': 7, 'radius': 4, 'spectral_radius': 46.512642380567456, 'spectral_gap': 9.924532398150227, 'natural_connectivity': 58.83684259131701, 'global_efficiency': 0.39241173374311733, 'assortativity': 0.5324194688774334}, 'netscience': {'nodes': 1461, 'edges': 2742, 'heterogeneity': 3.185137424233574, 'density': 0.002570954

# Dump to JSON

In [None]:
import json
with open("../Dataset/features.json", "w") as outfile:
    json.dump(feature, outfile)