# Network Interface: Make Graph Object

In [1]:
import re, json
import pandas as pd
import numpy as np
import networkx as nx
from networkx.readwrite import json_graph
from networkx.algorithms import community
from operator import itemgetter
from json import JSONEncoder

# Declare directory location to shorten filepaths later.
abs_dir = "/Users/quinn.wi/Documents/SemanticData/"

## Create Network Graph

In [2]:
%%time

# Read in nodes and edges.
nodes = pd.read_csv(abs_dir + "Output/Graphs/JQA_Network_mergedEntities-correlation/nodes.csv",
                    sep = ',')

edges = pd.read_csv(abs_dir + "Output/Graphs/JQA_Network_mergedEntities-correlation/links.csv",
                    sep = ',')


# Create dictionary to map values to codes.
nodes_dictionary = nodes['label'].to_dict()

# Map labels back onto source and target.
edges = edges.replace({'source':nodes_dictionary, 'target':nodes_dictionary})

# Convert edges dataframe to edges tuple (compatible with graph object below).
edges = [tuple(x) for x in edges[['source', 'target']].to_numpy()]

# Initialize graph object.
G = nx.Graph()

# Add nodes and edges to graph object.
G.add_nodes_from(nodes['label'])
G.add_edges_from(edges)

# Add node attributes.
nx.set_node_attributes(G, pd.Series(nodes['ner_label'].values.tolist(),
                                    index=nodes['label']).to_dict(), 'ner_label')

nx.set_node_attributes(G, pd.Series(nodes['match_quality'].values.tolist(),
                                    index=nodes['label']).to_dict(), 'match_quality')


print (nx.info(G))

Name: 
Type: Graph
Number of nodes: 4677
Number of edges: 33946
Average degree:  14.5161
CPU times: user 3.5 s, sys: 155 ms, total: 3.65 s
Wall time: 3.69 s


#### Append Network Info & Detect Communities

NetworkX adds node/edge information with ```nx.set_node_attributes```.

In [3]:
%%time

# Measure network density.
density = nx.density(G)
print (f"Network density: {density:.3f}")

# Related to diameter, check if network is connected and, therefore, can have a diameter.
print (f"Is the network connected? {nx.is_connected(G)}")

# Get a list of network components (communities).
# Find the largest component.
components = nx.connected_components(G)
largest_component = max(components, key = len)

# Create a subgraph of the largest component and measure its diameter.
subgraph = G.subgraph(largest_component)
diameter = nx.diameter(subgraph)
print (f"Network diameter of the largest component: {diameter:.3f}")

# Find triadic closure (similar to density).
triadic_closure = nx.transitivity(G)
print (f"Triadic closure: {triadic_closure:.3f}\n")

# Find centrality measures.
betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality

# Assign each centrality measure to an attribute.
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')
nx.set_node_attributes(G, dict(G.degree(G.nodes())), 'degree')

# Find communities.
communities = community.greedy_modularity_communities(G)

# Create a dictionary that maps nodes to their community.
modularity_dict = {}
for i, c in enumerate(communities):
    for name in c:
        modularity_dict[name] = i
        
# Add modularity information to graph object.
nx.set_node_attributes(G, modularity_dict, 'modularity')

Network density: 0.003
Is the network connected? False
Network diameter of the largest component: 18.000
Triadic closure: 0.893

CPU times: user 2min 28s, sys: 718 ms, total: 2min 29s
Wall time: 2min 31s


## Save Graph Object

In [4]:
%%time

# Convert graph object into a dictionary.
data = json_graph.node_link_data(G)

# Serialize dictionary with json.
class NPEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return JSONEncoder.default(self, obj)
    
data_json = json.dumps(data, cls=NPEncoder)

with open(abs_dir + "Output/Graphs/JQA_Network_mergedEntities-correlation/network.json",
          "w") as f:
    
    f.write(data_json)

CPU times: user 92.1 ms, sys: 4.39 ms, total: 96.5 ms
Wall time: 96.5 ms
