# Fun with comics - with NetworkX

A intro to functionality.  

nodes.csv: Contains two columns (node, type), indicating the name and the type (comic, hero) of the nodes.

edges.csv: Contains two columns (hero, comic), indicating in which comics the heroes appear.

hero-edge.csv: Contains the network of heroes which appear together in the comics. This file was originally taken from http://syntagmatic.github.io/exposedata/marvel/

In [None]:
# Nx
import networkx as nx

In [None]:
# Standard Pythin stuff
import pandas
import time

In [None]:
# read all the data.  pass in the module to use
def read_data(m):
    _n = m.read_csv('./marvel/nodes.csv')
    _e = m.read_csv('./marvel/edges.csv')
    _he = m.read_csv('./marvel/hero-network.csv')
    return (_n, _e, _he)

In [None]:
%%time
(nodes, edges, heros) = read_data(pandas)
(len(nodes), len(edges), len(heros))

----

In [None]:
# Use the edge file and build a graph
# first a quick peek at the edge file
edges.head(1)

In [None]:
# Nx seems to have issues if the column names are different
edges = edges.rename(columns={"hero": "source", "comic": "target"})

In [None]:
G = nx.from_pandas_edgelist(edges,edge_attr=None)

In [None]:
(G.number_of_nodes(),G.number_of_edges())

-----
__Who is key in the graph__

In [None]:
%%time
# Run BC
bc = nx.betweenness_centrality(G, k=1000)

In [None]:
%%time
bc_sorted = sorted(bc.items(), key=lambda x: x[1], reverse=True)
bc_sorted[0:10]

In [None]:
%%time
# Run PageRank
pr = nx.pagerank(G)

In [None]:
%%time
pr_sorted = sorted(pr.items(), key=lambda x: x[1], reverse=True)
pr_sorted[0:10]

----
some visualization

In [None]:
import graphistry

In [None]:
graphistry.register(api=3, protocol="https", server="hub.graphistry.com", username="bradley_rees", password="1LoveGraphs!")

In [None]:
graphistry.edges(edges, 'hero', 'comic').plot()

In [None]:
graphistry.

In [None]:
# Viz libraries

from cuxfilter.charts.datashader.custom_extensions.graph_assets import calc_connected_edges

import holoviews as hv

from colorcet import fire
from datashader.bundling import directly_connect_edges, hammer_bundle

from holoviews.operation.datashader import datashade, dynspread
from holoviews.operation import decimate

from dask.distributed import Client

In [None]:
# Setup Viz
client = Client()
hv.notebook_extension('bokeh','matplotlib')
decimate.max_samples=20000
dynspread.threshold=0.01
datashade.cmap=fire[40:]
sz = dict(width=150,height=150)
%opts RGB [xaxis=None yaxis=None show_grid=False bgcolor="black"]

In [None]:
# Define the parameters 
ITERATIONS=500
THETA=1.0
OPTIMIZE=True

In [None]:
pos_gdf = cugraph.layout.force_atlas2(G,
                                  max_iter=ITERATIONS,
                                  pos_list=None,
                                  outbound_attraction_distribution=True,
                                  lin_log_mode=False,
                                  edge_weight_influence=1.0,
                                  jitter_tolerance=1.0,
                                  barnes_hut_optimize=OPTIMIZE,
                                  barnes_hut_theta=THETA,
                                  scaling_ratio=2.0,
                                  strong_gravity_mode=False,
                                  gravity=1.0,
                                  verbose=False,
                                  callback=None)

In [None]:
connected = calc_connected_edges(pos_gdf,
                                 edges,
                                 node_x="x",
                                 node_y="y",
                                 node_x_dtype="float32",
                                 node_y_dtype="float32",
                                 node_id="vertex",
                                 edge_source="hero",
                                 edge_target="comic",
                                 edge_aggregate_col=None,
                                 edge_render_type="direct",
                                )

In [None]:
%%opts RGB [tools=["hover"] width=800 height=800]

r_direct = hv.Curve(connected, label="Direct")
datashade(r_direct)

In [None]:
len(edges)

In [None]:
edges.sort_values(by='comic')