In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

agg = pd.read_csv('../data/aggregated_table1_new.csv')
agg = agg.drop(agg.iloc[:,[0]], axis = 1)
agg = agg.rename(columns={"Index":"source","Global":"type","Pairwise":"target","n":"value"})
agg = agg.dropna()
agg

Unnamed: 0,source,type,target,value
0,BWT-FM,Backward Search,Banded Myers Bit Vector,1
1,BWT-FM,Backward Search,"Based on the Wrapper, Local and Global",1
2,BWT-FM,Backward Search,Global,1
3,BWT-FM,Backward Search,Hamming Distance,8
4,BWT-FM,Backward Search,Local,5
5,BWT-FM,Backward Search,Local & Global,2
6,BWT-FM,Chain of Seeds,Local & Global,1
7,BWT-FM,Seed,Global,1
8,BWT-FM,Seed,Local,1
9,BWT-FM,Seed,Local & Global,1


In [2]:
from ipysankeywidget import SankeyWidget
from floweaver import *

size = dict(width=570, height=300)

#nodes = {
#    'index': ProcessGroup(['Hashing', 'BWT-FM', 'BWT-ST', 'Suffix Array', 'Suffix Tree']),
#    'pairwise': ProcessGroup(['Banded Myers Bit Vector', 'Hamming Distance', 'NW', 'SW', 'Local',
#                             'SW & NW', 'SW & Semi-Global', 'Non-DP Heuristic',
#                             'Hamming Distance, SW, and NW', 'Landau-Vishkin', 'Rabin–Karp Algorithm',
#                             'Sparse Dynamic Programming', 'Semi-Global']),
#}

nodes = {
    'index': ProcessGroup(['Hashing', 'BWT-FM', 'BWT-ST', 'Suffix Array', 'Suffix Tree']),
    'pairwise': ProcessGroup(['Banded Myers Bit Vector', 'Based on the Wrapper, Local and Global', 'Global', 'Local',
                             'Hamming Distance', 'Local & Global', 'Local & Semi-Global', 'Non-DP Heuristic',
                             'Hamming, Local, and Global Alignment', 'Landau-Vishkin', 'Rabin-Karp Algorithm',
                             'Sparse DP', 'Semi-Global']),
}

ordering = [
    ['index'],  
    ['pairwise'],
]

bundles = [
    Bundle('index', 'pairwise'),
]

sdd = SankeyDefinition(nodes, bundles, ordering)
weave(sdd, agg).to_widget(**size)

SankeyWidget(layout=Layout(height='300', width='570'), links=[{'source': 'index^*', 'target': 'pairwise^*', 't…

In [3]:
index_part = Partition.Simple('process', [
    'Hashing',
    'BWT-FM',
    ('Other', ['BWT-ST', 'Suffix Array', 'Suffix Tree']),
])

#pairwise_part = Partition.Simple('process', [
#    'SW',
#    'NW',
#    'Hamming Distance',
#    'Non-DP Heuristic',
#    ('Multiple Methods', ['SW & NW', 'SW & Semi-Global', 
#                          'Hamming Distance, SW, and NW']),
#    ('Other DP', ['Rabin–Karp Algorithm', 'Landau-Vishkin', 'Sparse Dynamic Programming', 
#                          'Banded Myers Bit Vector', 'Semi-Global', 'Local'])
#])

pairwise_part = Partition.Simple('process', [
    'Local',
    'Hamming Distance',
    'Global',
    'Non-DP Heuristic',
    ('Multiple Methods', ['Based on the Wrapper, Local and Global', 'Local & Global', 'Local & Semi-Global', 
                          'Hamming, Local, and Global Alignment']),
    ('Other DP', ['Rabin-Karp Algorithm', 'Landau-Vishkin', 'Sparse DP', 
                          'Banded Myers Bit Vector', 'Semi-Global'])
])

nodes['index'].partition = index_part
nodes['pairwise'].partition = pairwise_part
weave(sdd, agg).to_widget(**size)

SankeyWidget(groups=[{'id': 'index', 'type': 'process', 'title': '', 'nodes': ['index^Hashing', 'index^BWT-FM'…

In [16]:
part_by_global = Partition.Simple('type', ['Seed', 'Backward Search', 'Chain of Seeds'])
part_by_index = Partition.Simple('source', ['Hashing', 'BWT-FM', 'Other'])
pal = ['cornflowerblue','darkorange','limegreen']
sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=part_by_index)
weave(sdd, agg, palette=pal).to_widget(**size)

SankeyWidget(groups=[{'id': 'index', 'type': 'process', 'title': '', 'nodes': ['index^Hashing', 'index^BWT-FM'…

In [17]:
nodes['global'] = Waypoint(part_by_global)
ordering = [
    ['index'],
    ['global'],
    ['pairwise'],
]

bundles = [
    Bundle('index', 'pairwise', waypoints=['global']),
]

sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=part_by_index)
weave(sdd, agg, palette=pal).to_widget(**size).auto_save_png('../figures/Sankey_Table1_Tools.png')

SankeyWidget(groups=[{'id': 'index', 'type': 'process', 'title': '', 'nodes': ['index^Hashing', 'index^BWT-FM'…

In [39]:
data = pd.read_csv('../data/Table1_new.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Year_of_publication,Aligner,URL,Citation,read_length,max_read_length_tested,Title,Number_of_Citations,Indexing,...,Pairwise_alignment_2,Pairwise_alignment (DP-based?),fix length seed,Variable length seed (hybrid seeding),spaced seed,chain_of_seeds,Application,email - first author,email - last author,Verified
0,43.0,2011,Bismark,https://www.bioinformatics.babraham.ac.uk/proj...,Krueger et al. ‎2011,Ultra-short,50,Bismark: a flexible aligner and methylation ca...,1550,BWT-FM,...,Hamming Distance,N,Y,N,N,N,BS-Seq,felix.krueger@bbsrc.ac.uk,,
1,16.0,2009,BRAT,http://compbio.cs.ucr.edu/brat/,Harris et al. 2009,Ultra-short,26,BRAT: bisulfite-treated reads analysis tool,64,Hashing,...,Hamming Distance,N,Y,N,N,N,BS-Seq,elenah@cs.ucr.edu,stefano.lonardi@ucr.edu,
2,54.0,2012,BRAT-BW,http://compbio.cs.ucr.edu/brat/,Harris et al. 2012,Ultra-short,62,BRAT-BW: efficient and accurate mapping of bis...,70,BWT-FM,...,Hamming Distance,N,N,Y,N,N,BS-Seq,elenayharris@gmail.com,stefano.lonardi@ucr.edu,
3,32.0,2010,BS-Seeker,https://github.com/BSSeeker/BSseeker2,Chen et al. ‎2010,Ultra-short,36,BS Seeker: precise mapping for bisulfite seque...,215,BWT-FM,...,Hamming Distance,N,Y,N,N,N,BS-Seq,paoyang@gate.sinica.edu.tw,matteop@mcdb.ucla.edu,
4,63.0,2013,BS-Seeker2,https://github.com/BSSeeker/BSseeker2,Guo et al. ‎2013,Short,250,BS-Seeker2: a versatile aligning pipeline for ...,173,BWT-FM,...,Local & Global,Y,Y,N,N,N,BS-Seq,guoweilong@cau.edu.cn,matteop@mcdb.ucla.edu,
