In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

agg = pd.read_csv('../raw_data/aggregate_tool_info.csv')
agg = agg.rename(columns={"indexing":"source","seed_type":"type","pairwise_alignment":"target","count":"value"})
agg = agg.dropna()
agg

Unnamed: 0,source,type,target,value
0,BWT,YNN,SW,1
1,BWT-FM,NNN,HD,1
2,BWT-FM,NNN,SW,1
3,BWT-FM,NNN,Semi-Global,1
4,BWT-FM,NNY,NW,2
5,BWT-FM,NNY,SW,2
6,BWT-FM,NNY,SW & NW,2
7,BWT-FM,YNN,Banded Myers Bit Vector,1
8,BWT-FM,YNN,HD,4
9,BWT-FM,YNN,Non-DP Heuristic,4


In [2]:
from ipysankeywidget import SankeyWidget
from floweaver import *

size = dict(width=1200, height=600)

nodes = {
    'index': ProcessGroup(['hashing', 'BWT-FM', 'BWT-FM + hashing', 'BWT-ST', 'suffix array', 'suffix tree', 'BWT']),
    'pairwise': ProcessGroup(['Banded Myers Bit Vector', 'HD', 'NW', 'SW', 'SW & NW', 'SW & Semi-Global',
                             'SW and NW', 'SW and Semi-Global', 'Non-DP Heuristic', 'Non-DP heuristic',
                             'Landau-Vishkin', 'Rabin-Karp Algorithm', 'Semi-global',
                             'Sparse DP', 'Semi-Global', 'Myers Bit-Vector']),
}

#nodes = {
#    'index': ProcessGroup(['Hashing', 'BWT-FM', 'BWT-ST', 'Suffix Array', 'Suffix Tree']),
#    'pairwise': ProcessGroup(['Banded Myers Bit Vector', 'Based on the Wrapper, Local and Global', 'Global', 'Local',
#                             'Hamming Distance', 'Local & Global', 'Local & Semi-Global', 'Non-DP Heuristic',
#                             'Hamming, Local, and Global Alignment', 'Landau-Vishkin', 'Rabin-Karp Algorithm',
#                             'Sparse DP', 'Semi-Global']),
#}

ordering = [
    ['index'],  
    ['pairwise'],
]

bundles = [
    Bundle('index', 'pairwise'),
]

sdd = SankeyDefinition(nodes, bundles, ordering)
weave(sdd, agg).to_widget(**size, margins=dict(top=0, bottom=0, left=50, right=100))

SankeyWidget(layout=Layout(height='600', width='1200'), links=[{'source': 'index^*', 'target': 'pairwise^*', '…

In [3]:
index_part = Partition.Simple('process', [
    'BWT-FM',
    ('BWT-FM & Hashing', ['BWT-FM + hashing']),
    ('Hashing', ['hashing']),
    ('Other Suffix', ['BWT-ST', 'suffix array', 'suffix tree', 'BWT']),
])

pairwise_part = Partition.Simple('process', [
    ('Smith-Waterman', ['SW']),
    ('Hamming Distance', ['HD']),
    ('Needleman-Wunsch', ['NW']),
    ('Other DP', ['Rabin-Karp Algorithm', 'Landau-Vishkin', 'Sparse DP', 'Banded Myers Bit Vector', 
                  'Semi-Global', 'Semi-global', 'Myers Bit-Vector']),
    ('Non-DP Heuristic', ['Non-DP Heuristic', 'Non-DP heuristic']),
    ('Multiple Methods', ['SW & NW', 'SW & Semi-Global', 'SW and NW', 'SW and Semi-Global', ])
])

#pairwise_part = Partition.Simple('process', [
#    'Local',
#    'Hamming Distance',
#    'Global',
#    'Non-DP Heuristic',
#    ('Multiple Methods', ['Based on the Wrapper, Local and Global', 'Local & Global', 'Local & Semi-Global', 
#                          'Hamming, Local, and Global Alignment']),
#    ('Other DP', ['Rabin-Karp Algorithm', 'Landau-Vishkin', 'Sparse DP', 
#                          'Banded Myers Bit Vector', 'Semi-Global'])
#])

nodes['index'].partition = index_part
nodes['pairwise'].partition = pairwise_part
weave(sdd, agg).to_widget(**size, margins=dict(top=0, bottom=0, left=140, right=160))

SankeyWidget(groups=[{'id': 'index', 'type': 'process', 'title': '', 'nodes': ['index^BWT-FM', 'index^BWT-FM &…

In [4]:
part_by_index = Partition.Simple('source', [
    'BWT-FM', 
    ('BWT-FM & Hashing', ['BWT-FM + hashing']),
    ('Hashing', ['hashing']),
    ('Other Suffix', ['BWT-ST', 'suffix array', 'suffix tree', 'BWT']),
])
pal = ['mediumorchid', 'navy', 'darkorange', 'cornflowerblue' ]
sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=part_by_index)
weave(sdd, agg, palette=pal).to_widget(**size, margins=dict(top=0, bottom=0, left=140, right=160)).auto_save_png(
    '../figures/Figure2.png')

SankeyWidget(groups=[{'id': 'index', 'type': 'process', 'title': '', 'nodes': ['index^BWT-FM', 'index^BWT-FM &…