# Bokeh graph data preparation

In [1]:
import networkx as nx
import json
from math import log

import numpy as np

## Code to use in order to retrieve data from a particular debugging run
 
```{pyhton}
nx.write_gml('subgraph.gml')

import json

with open('subgraph.json', 'w') as fp:
    data = json.dump(readSequenceDb, fp)
```

Loading data from files generated during the debugging process. 

In [2]:
network = nx.read_gml('subgraph.gml')

with open('subgraph.json', 'r') as fp:
    readSequenceDb = json.load(fp)

Checking if the network nodes are in long verbose format or just a short one. In the former case, nodes will be renamed to shorter name containing only name of the sequences represented. 

In [3]:
flag = False

for node in network.nodes():
    if 'read_position' in node:
        flag = True
        break
        
        
if flag == True:
    mapping = {}
    for node in network.nodes():
        mapping[node] = node.split(';')[0]
    
    network = nx.relabel_nodes(network, mapping) 

## Retrieving the longest 'shortest path' through the graph

The following 

In [4]:
sp = dict(nx.all_pairs_shortest_path(network))

maxLength = 0
maxLengthNodes = []

for entry in sp:
           
    if maxLength < len(sp[entry]):
        maxLength = len(sp[entry])
        maxLengthNodes = []
        maxLengthNodes.append(list(sp[entry].keys()))
    elif maxLength == len(sp[entry]):
        maxLengthNodes.append(list(sp[entry].keys()))
                
    elif maxLength > len(sp[entry]):
        continue

In [5]:
spine = maxLengthNodes[0]

In [6]:
for node in spine:
    if len(list(network.predecessors(node))) == 0:
        tipNode1 = node
        break
        
for node in spine:
    if len(list(network.successors(node))) == 0:
        tipNode2 = node
        break       

In [7]:
shortest = list(nx.shortest_simple_paths(network,tipNode1, tipNode2))

In [8]:
endTips = []
beginningTips = []

for node in network.nodes():
    if len(list(network.successors(node))) == 0:
        endTips.append(node)
    elif len(list(network.predecessors(node))) == 0:
        beginningTips.append(node)

In [9]:
maxPathLen = 0
longestPath = []

for start in beginningTips:
    for end in endTips:
        try:
            paths = list(nx.shortest_simple_paths(network, start, end))
            
            pathLength = max(map(lambda x : len(x), paths))
                        
            if pathLength > maxPathLen:
                maxPathLen = pathLength
                                
                longestPath = []
                for entry in paths:
                    longestPath.append(entry)
            elif pathLength == maxPathLen:
                for entry in paths:
                    longestPath.append(entry)
                
        except:
            # No path between the two nodes has been found.
            continue

In [28]:
iterPath =iter(longestPath[0])

positionX = 0

ypositions = list(range(0,1000, 10))

previousNode = longestPath[0][0]

nodePosition = {}

next(iterPath)



for node in iterPath:
    seqLen = len(readSequenceDb[previousNode])
    overhang = seqLen - network[previousNode][node]['overlap']
    positionX = positionX + overhang
    
    positionY = ypositions.pop(0)
    
    nodePosition[node] = (positionX, positionY)
    
    previousNode = node

In [29]:
nodeDict = {}

for key in nodePosition:

    nodeDict[key] = nodePosition[key]

In [30]:
nodesToIncorporate = list(network.nodes())

incorporatedNodes = []

for node in nodeDict:
    nodesToIncorporate.remove(node)
    incorporatedNodes.append(node)

In [31]:
list(range(1,10,2))

[1, 3, 5, 7, 9]

In [32]:
counter = 0

while len(nodesToIncorporate) > 0:
    
    tempDict = {}
    #print(nodePosition)
    
    for node in nodeDict:
        counter = counter + 1
        
        #print(len(nodesToIncorporate))
        
        for successor in network.successors(node):
            if successor not in incorporatedNodes:
                
                #print(successor)
                
                x, y = nodeDict[node]
                
                xNew = x + len(readSequenceDb[successor]) - network[node][successor]['overlap']
                
                yNew = ypositions.pop(0)
                
                #if len(list(network.successors(node))) > 1:
                #    yNew = y + 10
                #    print("node : {}, successors: {}".format(node, len(list(network.successors(node)))))
                #else:
                #    yNew = y + 0
                
                tempDict[successor] = (xNew, yNew)
                nodesToIncorporate.remove(successor)
                incorporatedNodes.append(successor)
                
        for predecessor in network.predecessors(node):
            if predecessor not in incorporatedNodes:
                
                x, y = nodeDict[node]
                
                xNew = x - len(readSequenceDb[predecessor]) + network[predecessor][node]['overlap']
                
                yNew = ypositions.pop(0)
                
                #if len(list(network.predecessors(node))) > 1:
                #    yNew = y + 10
                #    print("node : {}, predecessors: {}".format(node, len(list(network.successors(node)))))
                #else:
                #    yNew = y + 0
                
                tempDict[predecessor] = (xNew, yNew)
                nodesToIncorporate.remove(predecessor)
                incorporatedNodes.append(predecessor)
    
    for entry in tempDict:
        #3print("Adding {}".format(entry))
        nodeDict[entry] = tempDict[entry]
        #print(len(nodeDict))
    
    if counter > 1000:
        print("TOO MANY ITERATIONS")
        break

In [33]:
layout = {}

for node in nodeDict:
    layout[node] = np.array([nodeDict[node][0], nodeDict[node][1]], dtype='int')

In [34]:
from bokeh.models import HoverTool,ColumnDataSource, WheelZoomTool, LabelSet
from bokeh.plotting import show, figure

nodes, nodes_coordinates = zip(*sorted(layout.items()))
nodes_xs, nodes_ys = list(zip(*nodes_coordinates))

In [35]:
d = dict(xleft=[], xright = [], ybottom=[], ytop = [], widths = [], name= [])
    
nodes, nodes_coordinates = zip(*sorted(layout.items()))
d['xleft'], d['ybottom'] = list(zip(*nodes_coordinates))

for i, (xl, yb, node)in enumerate(zip(d['xleft'], d['ybottom'], nodes)):
    d['xright'].append(d['xleft'][i] + len(readSequenceDb[node]))
    d['ytop'].append(d['ybottom'][i] + len(readSequenceDb[node]))

In [36]:
def get_nodes_specs(_network, _layout, _seqDict):
    d = dict(xleft=[], xright = [], ybottom=[], ytop = [], widths = [], onhover= [], seq=[])
        
    nodes, nodes_coordinates = zip(*sorted(_layout.items()))
    d['xleft'], d['ybottom'] = list(zip(*nodes_coordinates))
    
    for i, (xl, yb, node)in enumerate(zip(d['xleft'], d['ybottom'], nodes)):
        d['xright'].append(d['xleft'][i] + len(_seqDict[node]))
        d['ytop'].append(d['ybottom'][i] + 5)
        
    for node in nodes:
        d['onhover'].append(node)
        d['widths'].append(len(_seqDict[node]))
        d['seq'].append(_seqDict[node])
    return d

def get_edges_specs(_network, _layout):
    d = dict(xs=[], ys=[], alphas=[], widths = [], onhover=[])
    weights = [d['overlap'] for u, v, d in _network.edges(data=True)]
    max_weight = max(weights)
    calc_alpha = lambda h: 1 + log(h/max_weight)/log(2)
    calc_width = lambda h: 1 + log(10*h/max_weight)/log(2)
    
    # example: { ..., ('user47', 'da_bjoerni', {'weight': 3}), ... }
    for u, v, data in _network.edges(data=True):
        d['xs'].append([_layout[u][0], _layout[v][0]])
        d['ys'].append([_layout[u][1], _layout[v][1]])
        d['alphas'].append(calc_alpha(data['overlap']))
        d['widths'].append(calc_width(data['overlap']))
        d['onhover'].append("Overlap: " + str(data['overlap']))
    return d

In [37]:
nodes_source = ColumnDataSource(get_nodes_specs(_network=network, _layout=layout, _seqDict=readSequenceDb))

lines_source = ColumnDataSource(get_edges_specs(network, layout))


In [38]:
hover = HoverTool(tooltips=[('Info', '@onhover')])
plot = figure(plot_width=1500, plot_height=500,
              tools=['tap', 'box_zoom', hover, 'reset'])

#labels = LabelSet(x='xleft', y='ybottom', text='seq', level='glyph', x_offset=0, y_offset=3, source=nodes_source, render_mode='canvas',text_font_size='10pt')

#r_circles = plot.circle('xs', 'ys', size='widths', color='blue', level = 'overlay', source=nodes_source)

r_quad = plot.quad( left = 'xleft', right = 'xright', top = 'ytop', bottom = 'ybottom', color = 'grey', alpha = 0.5, source = nodes_source)

#p = plot.renderers.append(TextAnnotation(text='seq', offset='2px', glyph='rect', source=nodes_source))




r_lines = plot.multi_line('xs', 'ys', line_width='widths', alpha='alphas', color='gray', source=lines_source)

plot.add_tools(WheelZoomTool())
plot.add_tools()
#plot.add_layout(labels)

In [39]:
show(plot)