In [1]:
import pickle as p
import networkx as nx
import os
import pandas as pd
import numpy as np

In [2]:
def graph_remove_dummy_true(graph):
    nodes_dummy_true = [x for x,y in graph.nodes(data=True) if y['is_dummy']==True]
    graph.remove_nodes_from(nodes_dummy_true)
    return graph

def graph_properties(G):
    # print(nx.info(G))
    nodes = len(G.nodes())    
    edges = len(G.edges())
    density = nx.density(G)
    is_conn = nx.is_connected(G)
    components = len(list(nx.connected_components(G)))
    #Compute degree assortativity of graph
    assortativity = nx.algorithms.assortativity.degree_pearson_correlation_coefficient(G)
    # if assortativity = 0, it is non-assortative (assortativity lies in between -1 and 1)

    #Average clustering coefficient
    clus_coeff = nx.algorithms.cluster.average_clustering(G)
    
    all_geo = [z['geodesic_distance'] for x,y,z in list(G.edges.data())]
    mean_geo = np.array(all_geo).mean()
    
    prop_dict = {'nodes':nodes,'edges':edges,'density':density,'is_G_connected':is_conn,
                '# of components':components,'assortativity':assortativity,'clustering_coeff':clus_coeff, 
                'mean_geodesic': mean_geo}
    
    return prop_dict

In [3]:
path = "./data/OASIS_full_batch/modified_graphs/"
directory = os.listdir("/home/manvi/Manvi/Rohit_work/Graph_matching/data/OASIS_full_batch/modified_graphs")

In [4]:
cols = ['graph_id','nodes','edges','density','is_G_connected','# of components','assortativity','clustering_coeff',
       'mean_geodesic']
df = pd.DataFrame(columns=cols)

for file in directory:
    Gr = p.load(open(path+file, "rb" ))
    new_Gr = graph_remove_dummy_true(Gr)
    properties = graph_properties(new_Gr)
    properties['graph_id'] = file[:-8]
    df = df.append(properties,ignore_index=True)

In [5]:
df

Unnamed: 0,graph_id,nodes,edges,density,is_G_connected,# of components,assortativity,clustering_coeff,mean_geodesic
0,graph_52,91,351,0.085714,True,1,-0.017686,0.519178,33.084883
1,graph_32,91,350,0.085470,True,1,0.019778,0.497541,33.667072
2,graph_71,83,316,0.092859,True,1,-0.040627,0.527047,35.386811
3,graph_61,85,324,0.090756,True,1,-0.048539,0.535973,34.034994
4,graph_76,91,347,0.084737,True,1,-0.013826,0.500701,32.641216
...,...,...,...,...,...,...,...,...,...
129,graph_27,87,332,0.088746,True,1,-0.013541,0.509934,34.015174
130,graph_86,83,314,0.092272,True,1,-0.023559,0.477078,34.171934
131,graph_96,92,352,0.084090,True,1,-0.054075,0.530559,33.647941
132,graph_109,81,310,0.095679,True,1,-0.041563,0.498208,34.810024


In [None]:
df['clustering_coeff'].mean(), df['clustering_coeff'].std()

In [None]:
all_geo = [z['geodesic_distance'] for x,y,z in list(new_Gr.edges.data())]
np.array(all_geo).mean()

In [None]:
list(new_Gr.edges.data())

In [None]:
# Generate the graph
# g, adj, protS, dict_s = getGraph_fairwalk(sizes=sizes, probs=probs, choice=method)
# nx.set_node_attributes(g,dict_s,'sensitive_attr')
# attr_comm = [{i for i in dict_s if dict_s[i]==0},{i for i in dict_s if dict_s[i]==1}] #communities on basis of attribute

# dens.append(nx.density(g))
# assort.append(nx.algorithms.assortativity.degree_pearson_correlation_coefficient(g)) #Compute degree assortativity of graph.
# assort_attributes.append(nx.attribute_assortativity_coefficient(g, 'sensitive_attr')) # assortavity with attributes
# clustering.append(nx.algorithms.cluster.average_clustering(g))
# modularity.append(nx.algorithms.community.modularity(g,attr_comm))