# Create an interactive bokeh Website-Map

This notebook processes the graph-svg from gephi and creates an interactive bokeh-plot.

In [1]:
domain = 'via-ev'

In [2]:
from bokeh.plotting import figure, output_file, show
from bokeh.models import HoverTool
from bs4 import BeautifulSoup
import pandas as pd
import re

In [3]:
df = pd.read_csv('build/'+domain+'_Website_Map_Edges.csv')
source_links = df['Source'].value_counts().to_frame().to_dict()
target_links = df['Target'].value_counts().to_frame().to_dict()

gephi_svg_path = 'build/'+domain+'.svg'
bokeh_html_path = 'build/'+domain+'.html'

title = 'Verlinkungen auf der Webseite von {}'.format(domain)

In [4]:
with open(gephi_svg_path) as f:
    bs = BeautifulSoup(f.read(), 'lxml')

In [5]:
def lighten_hex(color, factor=0.9):
    def hex_to_rgb(value):
        """Return (red, green, blue) for the color given as #rrggbb."""
        value = value.lstrip('#')
        lv = len(value)
        return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))

    def rgb_to_hex(red, green, blue):
        """Return color as #rrggbb for the given color values."""
        return '#%02x%02x%02x' % (red, green, blue)
    
    rgb = hex_to_rgb(color)
    rgb = [int((255-x)*factor+x) for x in rgb]
    return rgb_to_hex(*rgb)

In [6]:
height = int(float(bs.find('svg')['height']))
width = int(float(bs.find('svg')['width']))

edges = []
for path in bs.findAll('path'):
    x0, y0, cx0, cy0, cx1, cy1, x1, y1 = re.findall('-?\d+\.\d+',path['d'])
    edges.append({'source':path['class'][0], 'target':path['class'][-1], 'x0':float(x0), 'y0':float(y0), 'x1':float(x1), 'y1':float(y1), 'cx0':float(cx0), 'cy0':float(cy0), 'cx1':float(cx1), 'cy1':float(cy1), 'fill':path['fill'], 'stroke':path['stroke'],'stroke_light':lighten_hex(path['stroke']), 'stroke-opacity':float(path['stroke-opacity']), 'stroke-width':float(path['stroke-width'])})
df_edges = pd.DataFrame(edges)
    
nodes = []
for circle in bs.findAll('circle'):
    nodes.append({'class':circle['class'][0], 'cx':float(circle['cx']), 'cy':float(circle['cy']), 'fill':circle['fill'], 'fill_light':lighten_hex(circle['fill']), 'fill-opacity':float(circle['fill-opacity']), 'r':float(circle['r']), 'stroke':circle['stroke'], 'stroke-opacity':circle['stroke-opacity'], 'stroke-width':circle['stroke-width']})
df_nodes = pd.DataFrame(nodes)
    
node_labels = []
for text in bs.findAll('text'):
    node_labels.append({'class':text['class'][0], 'fill':text['fill'], 'font-family':text['font-family'], 'font-size':text['font-size'], 'style':text['style'], 'x':float(text['x']), 'y':float(text['y']), 'url':text.get_text().strip()})
df_node_labels = pd.DataFrame(node_labels)

In [7]:
node_dict = {}
for i, band in enumerate(df_nodes['class']):
    node_dict[band]=i

linked_nodes = {}
for i, band in enumerate(df_nodes['class']):
    l_nodes = df_edges[df_edges['source']==band]['target'].tolist()
    l_nodes.append(band)
    l_nodes = [node_dict[name] for name in list(node_dict.keys()) if name not in l_nodes]
    linked_nodes[i] = l_nodes
    
linked_edges = {}
for band in df_edges['source'].unique():
    ind_band = node_dict[band]
    ind = df_edges[df_edges['source']!=band]['target'].index.tolist()
    linked_edges[ind_band] = ind

In [8]:
def getLinkCount(url, key):
    if key=='Source':
        if url in source_links['Source'].keys():
            return source_links['Source'][url]
        else:
            return 0
    if key=='Target':
        if url in target_links['Target'].keys():
            return target_links['Target'][url]
        else:
            return 0

def clean_name(x):
    return '/'+x.replace('//', '').split('/', 1)[-1]
                                                 
df_node_labels['text'] = df_node_labels['url'].apply(clean_name)

In [9]:
n_x, n_y, n_fill, n_fill_light, n_r = (df_nodes['cx'], -df_nodes['cy'], df_nodes['fill'], df_nodes['fill_light'], df_nodes['r'])
e_x0, e_y0, e_x1, e_y1, e_cx0, e_cy0, e_cx1, e_cy1, e_line_color, e_line_color_light, e_line_width, e_source, e_target = (df_edges['x0'], -df_edges['y0'], df_edges['x1'], -df_edges['y1'], df_edges['cx0'], -df_edges['cy0'], df_edges['cx1'], -df_edges['cy1'], df_edges['stroke'], df_edges['stroke_light'], 0.5+df_edges['stroke-width'], df_edges['source'], df_edges['target'])
t_text, t_url = (df_node_labels['text'], df_node_labels['url'])

hover_alpha = 1.0 
no_hover_alpha = 1.0

from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, Circle, HoverTool, Bezier, Legend, OpenURL, TapTool

output_file(bokeh_html_path)

p = figure(width=width, height=height, tools="pan,wheel_zoom,save,reset,tap", toolbar_location='above', title=title, active_scroll='wheel_zoom', active_drag='pan')
p.axis.visible = False

source_edges = ColumnDataSource({'x0':e_x0, 'y0':e_y0, 'x1':e_x1, 'y1':e_y1, 'cx0':e_cx0, 'cy0':e_cy0, 'cx1':e_cx1, 'cy1':e_cy1, 'line_color':e_line_color, 'line_color_light':e_line_color_light, 'line_width':e_line_width, 'source':e_source, 'target':e_target})
er = p.bezier(source=source_edges, x0='x0', y0='y0', x1='x1', y1='y1', cx0='cx0', cy0='cy0', cx1='cx1', cy1='cy1', line_color='line_color', line_width='line_width', line_alpha=no_hover_alpha)

source_nodes = ColumnDataSource({'x':n_x, 'y':n_y, 'fill':n_fill, 'fill_light':n_fill_light, 'r':n_r, 'url':t_url, 'text':t_text, 'links':[getLinkCount(url, 'Source') for url in t_url], 'backlinks':[getLinkCount(url, 'Target') for url in t_url]})
cr = p.circle(source=source_nodes, x='x', y='y', fill_color='fill', fill_alpha=no_hover_alpha, line_color='black', radius='r')

tooltips = [("Website", "@text"), ("Links", "@links"), ("Back-Links", "@backlinks")]
p.add_tools(HoverTool(tooltips=tooltips))

tap = TapTool()
taptool = p.select(type=TapTool)
taptool.callback = OpenURL(url='@url')

show(p)

You can access Timestamp as pandas.Timestamp
  if pd and isinstance(obj, pd.tslib.Timestamp):
