In [1]:
%run ./resources/library.py

In [2]:
style_notebook()

Digital Case Study: Multidrug-Resistant Tuberculosis (MDR-TB) Outbreak - Revisiting the 2005 Outbreak Investigation in Thailand by John Oeltmann

## Note: Work in progress...

# Notebook 7, Part 3: Social Network Analysis

## Social Network Analysis

In [3]:
import pandas as pd

pd.__version__

'0.24.2'

In [4]:
pd.set_option('display.max_rows', None)  
pd.set_option('display.max_columns', None)  
pd.set_option('max_colwidth', -1)  
pd.set_option('display.width', 1000)

Let's load the two batches of data for nodes and edges.

In [15]:
nodes_df5 = pd.read_pickle('outputs/nodes_df5.pickle') # nodes / cases (MDR-TB only)
edges_df6 = pd.read_pickle('outputs/edges_df6.pickle') # edges / links

## Using `bokeh` for Network Visualization

In [21]:
nodes_df5.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13 entries, 0 to 12
Data columns (total 9 columns):
CaseNo          13 non-null object
links           13 non-null int64
FAKEMIRUVNTR    13 non-null object
FAKEMIRUID      13 non-null int64
DRTYPE          13 non-null object
LON             13 non-null float64
LAT             13 non-null float64
COORDS          13 non-null object
SYMBOL          13 non-null object
dtypes: float64(2), int64(2), object(5)
memory usage: 1.0+ KB


In [16]:
nodes = list(nodes_df5['CaseNo'])

nodes

['TH-101579',
 'TH-103347',
 'TH-103927',
 'TH-103009',
 'TH-101783',
 'TH-102909',
 'TH-101823',
 'TH-103773',
 'TH-104039',
 'TH-104090',
 'TH-102637',
 'TH-102445',
 'TH-102460']

In [49]:
attributes = {}
for each in nodes_df5.iterrows():
    new_dict = {each[1]['CaseNo']: {'Case No.': each[1]['CaseNo'],\
                              'Drug Res Type': each[1]['DRTYPE'],\
                              'Links': each[1]['links']}}
    attributes.update(new_dict)

attributes

{'TH-101579': {'Case No.': 'TH-101579', 'Drug Res Type': 'MDR-TB', 'Links': 4},
 'TH-101783': {'Case No.': 'TH-101783', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-101823': {'Case No.': 'TH-101823', 'Drug Res Type': 'MDR-TB', 'Links': 4},
 'TH-102445': {'Case No.': 'TH-102445', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-102460': {'Case No.': 'TH-102460', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-102637': {'Case No.': 'TH-102637', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-102909': {'Case No.': 'TH-102909', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-103009': {'Case No.': 'TH-103009', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-103347': {'Case No.': 'TH-103347', 'Drug Res Type': 'MDR-TB', 'Links': 3},
 'TH-103773': {'Case No.': 'TH-103773', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-103927': {'Case No.': 'TH-103927', 'Drug Res Type': 'MDR-TB', 'Links': 2},
 'TH-104039': {'Case No.': 'TH-104039', 'Drug Res Type': 'MDR-TB', 'Links': 1},
 'TH-104090': {'Case No.': 'TH-104090', 

In [28]:
tooltips = [
    ("Case No:", "@CaseNo"),
    ("Drug Res Type:", "@DRTYPE"),
    ("Links:", "@links")
]

In [29]:
edges = [ tuple(x) for x in edges_df6[['CaseNo1', 'CaseNo2']].values ]

edges

[('TH-101579', 'TH-101823'),
 ('TH-101783', 'TH-101823'),
 ('TH-101579', 'TH-104039'),
 ('TH-101579', 'TH-103927'),
 ('TH-101579', 'TH-102637'),
 ('TH-103347', 'TH-103773'),
 ('TH-103927', 'TH-104090'),
 ('TH-103009', 'TH-103347'),
 ('TH-102909', 'TH-103347'),
 ('TH-101823', 'TH-102445'),
 ('TH-101823', 'TH-102460')]

In [50]:
import networkx as nx

from bokeh.io import show, output_notebook
from bokeh.models import Plot, Range1d, MultiLine, \
    Circle, HoverTool, TapTool, BoxSelectTool, \
    ColumnDataSource, LabelSet
from bokeh.models.graphs import from_networkx, \
    NodesAndLinkedEdges, EdgesAndLinkedNodes
from bokeh.palettes import Spectral4

output_notebook()

G = nx.Graph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)

nx.set_node_attributes(G, attributes)

plot = Plot(plot_width=600, plot_height=600,
            x_range=Range1d(-1.1,1.1), y_range=Range1d(-1.1,1.1))

plot.title.text = "MDR-TB Links"

source = ColumnDataSource(nodes_df5)
labels = LabelSet(text_font_size="10pt", text_color="black",\
                  source=source, text_align='center')
plot.add_tools(HoverTool(tooltips=tooltips), TapTool(), BoxSelectTool())

# you can swap spring_layout for circular_layout
graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0,0))

graph_renderer.node_renderer.glyph = \
    Circle(size=15, fill_color=Spectral4[0])

graph_renderer.node_renderer.selection_glyph = \
    Circle(size=20, fill_color=Spectral4[2])

graph_renderer.node_renderer.hover_glyph = \
    Circle(size=15, fill_color=Spectral4[1])

graph_renderer.edge_renderer.glyph = \
    MultiLine(line_color="#CCCCCC", line_alpha=0.8, line_width=2)

graph_renderer.edge_renderer.selection_glyph = \
    MultiLine(line_color=Spectral4[2], line_width=5)

graph_renderer.edge_renderer.hover_glyph = \
    MultiLine(line_color=Spectral4[1], line_width=5)

graph_renderer.selection_policy = NodesAndLinkedEdges()
graph_renderer.inspection_policy = NodesAndLinkedEdges()#EdgesAndLinkedNodes()

plot.renderers.append(graph_renderer)

show(plot)

Add hover tips:

https://stackoverflow.com/questions/47210530/adding-node-labels-to-bokeh-network-plots

In [14]:
G.number_of_nodes()

13

In [124]:
G.number_of_edges()

11