In [None]:
from header import *

In [None]:
# SPEEDUP [Tools, scaling] -- time/alignment
df = read_benchmarks('table/tools_N1e7.tsv')
df = df[df.exit_status == "ok"]
df = df[df.n >= 3*10**3]
for e in pd.unique(df.e):
    df_n = df[df.e == e]
    plot_scaling(df_n, y='s_per_pair', x='n', filename=f'tools_e{e}', xlog=True, ylog=True, trend_line='poly', cone='csh', cone_x=3*10**4)

In [None]:
# SCALING WITH N
 
df = read_benchmarks('table/scaling_n_N1e7.tsv')
plot_scaling(df, y='s_per_pair', x='n', split=['alg', 'r'], filename='scaling_n', ylog=True, xlog=True, cone='cp-csh', cone_x=100, trend_line = 'poly')

In [None]:
# SCALING WITH E
df = read_benchmarks('table/scaling_e_N1e6.tsv')
plot_scaling(df, y='s_per_pair', x='e_pct', split=['alg', 'r'], filename=f'scaling_e', ylog=False, ls='-')

In [None]:
# TABLE AT 5%, 10^7
df = read_benchmarks('table/tools_N1e7.tsv')
df['alg_order'] = pd.Categorical(
    df['alg'],
    categories=['edlib','biwfa','sh','csh'],
    ordered=True
)
df = df[df.exit_status == 'ok']
df = df[df.n == 10**7]
pt = df.pivot_table(['s_per_pair','max_uss'],['alg_order'], ['e'])
pt = pt[['s_per_pair', 'max_uss']]
display(pt)

# Speedup
times = pt['s_per_pair']
for x in times:
    t = times[x]
    our_best = min(t['sh'], t['csh']) if not np.isnan(t['sh']) else t['csh']
    their_best = min(t['edlib'], t['biwfa'])
    print(f"Speedup at {x:0.2f}: {their_best/our_best:.4}")


In [None]:
# MAX MEMORY
df = read_benchmarks('table/tools_N1e7.tsv')
df = df[df.alg.isin(['sh', 'csh'])]
df = df.groupby(['e', 'alg'])['max_uss'].agg('max')
display(df)

In [None]:
# SCALING WITH K

df = read_benchmarks('table/scaling_k_N1e7.tsv')
algs=['cp-sh','cp-csh']
ns = [10**3, 10**4, 10**5, 10**6, 10**7]
df = df[df.alg.isin(algs)]
df = df[df.n.isin(ns)]

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
d = df.pivot(index='k', columns=['alg', 'm', 'n'], values='s_per_pair')
for alg in algs:
    d[alg].plot(ax=ax, alpha=0.6, color=algo2color(alg), marker=algo2marker(alg), ls='-', legend=False)
ax.set_xticks(pd.unique(df.k))
ax.set_yscale('log')
plt.savefig(plots_dir/('scaling_k.pdf'), bbox_inches='tight')
plt.show()

In [None]:
# SCALING WITH K (over e)

df = read_benchmarks('table/scaling_k_n_N1e7.tsv')
algs=['cp-sh','cp-csh']
df = df[df.alg.isin(algs)]
es = [0.02, 0.06, 0.10, 0.14]
df = df[df.e.isin(es)]

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
d = df.pivot(index='k', columns=['alg', 'm', 'e'], values='s_per_pair')
for alg in algs:
    d[alg].plot(ax=ax, alpha=0.6, color=algo2color(alg), marker=algo2marker(alg), ls='-', legend=False)
ax.set_xticks(pd.unique(df.k))
plt.savefig(plots_dir/('scaling_k_e_lin.pdf'), bbox_inches='tight')
plt.show()


fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
d = df.pivot(index='k', columns=['alg', 'm', 'e'], values='s_per_pair')
for alg in algs:
    d[alg].plot(ax=ax, alpha=0.6, color=algo2color(alg), marker=algo2marker(alg), ls='-', legend=False)
ax.set_xticks(pd.unique(df.k))
ax.set_yscale('log')
plt.savefig(plots_dir/('scaling_k_e_log.pdf'), bbox_inches='tight')
plt.show()

In [None]:
# MEMORY
name = "memory"
df = tools
# Only print things that didn't time out.
#df = df[df.exit_status == "ok"]
df = df[df.alg != 'cp-csh+gap']
df = df[df.alg != 'csh+gap']
# The constant memory of 30MB (snakemake error?) is too large in small tests
df = df[df.n >= 10**4]
def fit_from(algo):
    if algo in ['dijkstra', 'dijkstra_nogreedy', 'pa_noprune', 'pa_noprune_nogreedy']: return 10**4
    return 10**5

for e in pd.unique(df.e):
    df_n = df[df.e == e]
    df_n = df_n[df_n.max_uss != '-']
    df_n = df_n[df_n.max_uss > 0.025]
    if df_n.empty: continue
    plot_scaling(df_n, y='max_uss', x='n', filename=f'{name}_e{e}', xlog=True, ylog=True, trend_line='poly', fit_min=fit_from)

In [None]:
# Expanded states
name = "expanded"
df = tools
df = df[df.alg != 'cp-csh+gap']
df = df[df.alg != 'csh+gap']

# Only print things that didn't time out.
df = df[df.exit_status == "ok"]
def fit_from(algo):
    if algo in ['dijkstra', 'pa_noprune_nogreedy']:
        return 10000
    return 10000
import math

# Table of band
b = df[['alg', 'n', 'e', 'band']].dropna()
alg_order = ['sh', 'csh', 'csh-noprune',  'dijkstra']
b['alg_idx'] = b['alg'].map(lambda a: alg_order.index(a))
b = b.sort_values(by=['e', 'n', 'alg_idx'])
display(pd.pivot_table(b, values='band', columns=['n'], index=['e', 'alg'], sort=False))

# Plots of expanded states
for e in pd.unique(df.e):
    df_n = df[df.e == e]
    df_n = df_n.dropna(subset=['expanded'])
    plot_scaling(df_n, y='expanded', x='n', filename=f'{name}_e{e}', xlog=True, ylog=True, trend_line='poly', fit_min=fit_from, cone='csh')

In [None]:
# MAX MEMORY USAGE

df = tools
df = df[df.n >= 1000000]
df = df[df.e >= 0.1]
df = df.sort_values(by=['alg'])
display(df[['alg', 'n', 'e', 's', 'max_uss']])

In [None]:
# BEST PARAMS
p = params
p = p[p.e == 0.1]
p = p[p.alg == 'cp-sh']
p = p[p.n == 1000000]
display(p) # all
