In [None]:
from header import *
from scipy import optimize
plt.rcParams.update({'font.size': 16})
plots_dir = Path('imgs/')
timelimit = 600

In [None]:
# Best params per input
params = read_benchmarks('table/params_N1e7.tsv')
best_params = params.loc[params.groupby(['alg', 'cnt', 'n', 'e']).s.idxmin()]
# Merge the best PA runs into the other tool runs with fixed k
tools = read_benchmarks('table/tools_N1e7.tsv')
tools = pd.concat([tools, best_params], join='inner').sort_values(by=['e', 'n', 'alg'])

In [None]:
def plot_scaling(df, x, y, prefix, hline=False, algo1=None, algo2=None, show_mean=False, trend_line='', xlog=False, ylog=False, title=None, split='alg', fit_min=None):
    #fig, ax = plt.subplots()
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(6, 4, forward=True)
    
    # grid
    ax.set_facecolor('#F3F3F3')
    
    # ticks
    ax.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom=True,      # ticks along the bottom edge are off
        top=False,         # ticks along the top edge are off
    #    labelbottom=False # labels along the bottom edge are off
        labelsize=16,
    )
    
    #ax.tick_params(
    #    axis='y',          # changes apply to the x-axis
    #    which='minor',      # both major and minor ticks are affected
    #    left=False,         # ticks along the top edge are off
    #    labelbottom=False # labels along the bottom edge are off
    #)
    
    for spine in ["top", "left", "right"]:
        ax.spines[spine].set_visible(False)
    
    # Data
    d = df.groupby([split, x], group_keys=False, observed=True)[y].mean()  # observed=True drops categorical algos that don't appear
    d = d.reset_index(name='mean_{}'.format(y))
    d = d.pivot(index=x, columns=split, values='mean_{}'.format(y))
   
    for algo in d.columns:
        d[algo] = d[algo].dropna()[d[algo] > 0]
        

    if trend_line == "poly":
        # ln(y) = aln(x)+b
        # y = e^(aln(x)+b)
        # y = e^b * x^a
        
        z = {}
        for algo in d.columns:
            s = d[algo].dropna()
            if fit_min:
                s = s[s.index >= fit_min(algo)]
            #s = s[s>0]
            z[algo] = np.polyfit(np.log(s.index), np.log(s), 1)
        xs = list(d.index)
        if show_mean:
            d = d.append(df.groupby(split)[y].mean().rename('all'))
        #xs.append(xs[-1]*1.1)  # extrapolation
        
        # log(y[i]) ~= a*log(x[i]) + b
        #  => y[i] ~= e^(a*log(x[i]) + b) = x[i]^a * e^b
        # y[i] = O(base^i) = C * base^i
        #  => a*x[i] + b = ln(C * base^x[i]) = ln(base)*x[i] + ln(C)
        #  => a*x[0] + b = ln(base)*x[0] + ln(C)
        #     x[i]*[ * + b = ln(base)*x[1] + ln(C)
        #  => a*(x[1]-x[0]) = ln(base)*(x[1]-x[0]) => base = exp(a)
        
        # Best fit lines
        for algo in d.columns:
            regression_line = []
            a, b = z[algo]
            #angle = math.atan2(a, abs(b)) * 180 / math.pi
            for i in xs:
                regression_line.append((i**a) * np.exp(b))
            #display(regression_line)
            
            weight = 'bold' if 'seeds' in algo else 'normal'
            label = ''
            if len(d[algo].dropna()) > 1:
                ax.plot(xs, regression_line, linestyle='-', color=algo2color(algo), alpha=0.8)
                #print(np.exp(b), a)
                label = '$\sim x^{{{:0.2f}}}$'.format(a)  ## np.exp(b)*x^a
            ax.text(xs[-1], regression_line[-1], algo2beautiful(algo) + label,
                    color=algo2color(algo), ha='center', va='bottom', size=15, alpha=1, weight=weight)
    elif trend_line:
        print(trend_line)
        assert(False)
    
    # ticks
    if x == 'error_rate':
        ax.xaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0, decimals=0))
    
    if x == 'cost':
        ax.set_xticks(np.arange(0, max(df[x])+1, 1))
    
    #arr = [ 0 ] + [ 10**n for n in range(0,int(math.log10(max(df[y])))+1) ]
    #ax.set_yticks(arr)

    # Times
    #for index, row in d.iterrows():
        #print(row["astar-seeds"])
        #print(row["astar-seeds-intervals"])
    #    label = '{0:.3g}x'.format(row[algo1] / row[algo2])
    #    tmp_x = index if index != 'all' else len(d)-1
    #    tmp_y = np.exp((np.log(row[0]) + np.log(row[1])) / 2)
    #    ax.text(tmp_x, tmp_y, label, ha='right', va='center', size=12, alpha=0.7)
        
    colors = [ algo2color(algo) for algo in d.columns ]
    d.columns = [ algo2beautiful(col) for col in d.columns ]
    
    ########### Bars
    d.plot(ax=ax, alpha=0.6, zorder=3, rot=0, color=colors, marker='o', ls='', legend=False)
    #d.plot.bar(ax=ax, alpha=0.5, width=0.7, zorder=3, rot=0, color=colors)
    
    #if hline:
    #    ax.legend(loc="upper left", bbox_to_anchor=(0.0, 0.95), prop={'size': 15}, framealpha=0.5)
    #else:
    #    ax.legend(loc="upper left", prop={'size': 15}, framealpha=0.5)

    if hline:
        if y == 'explored_states' or y == 'crumbs':
            miny = df.len.median()
            maxy = df.refsize.median()*df.len.median()
        elif y == 'explored_per_bp':
            miny = 1
            maxy = df.refsize.median()
        else:
            assert(False)
        
        ax.axhline(maxy, color='k', linestyle='--', label=df.ref[0]) #, xmin=min(df[x]), xmax=max(df[x])+1
        ax.text(0.0, maxy, 'max    ', ha='right', va='center', size=16)
        
        ax.axhline(y=miny, color='k', linestyle='--', label=df.ref[0])  # xmin=min(df[x]), xmax=max(df[x])+1, 
        ax.text(0.0, miny, 'min    ', ha='right', va='center', size=16)

    if ylog:
        ax.set_yscale('log')
    else:
        ax.set_ylim(0)
        
    if xlog:
        ax.set_xscale('log')
      
       
    #ax.grid(True, which='both', axis='y', color='w')
    #ax.grid(False, axis='x', which='both')
    ax.grid(True, axis='y', which='major', color='w')
    ax.grid(False, axis='y', which='minor')
    
    # axis labelsi
    ax.set_xlabel(col2name(x), size=18)  # weight='bold',
    ax.set_ylabel(col2name(y), rotation=0, ha='left', size=18)
    ax.yaxis.set_label_coords(-0.10,1.00)
    
    if y == 't(map)':
        ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda num, pos: myticks(num, pos)+'s'))
   
    filename = '{}_{}_{}'.format(prefix, x, y)
    
    if trend_line:
        title = '{}_{}'.format(trend_line, title)
    
    if title:
        ax.set_title(title)
        filename += '_{}'.format(title)
    
    #plt.savefig(prefix+'_'+x+'-'+y+'.pdf', bbox_inches='tight')
    plt.savefig(plots_dir/(filename+'.pdf'), bbox_inches='tight')

In [None]:
# TOOL TIME COMPARISON
experiment_name = "time"
df = tools
# Only print things that didn't time out.
df = df[df.exit_status == "ok"]
def fit_from(algo):
    if algo in ['dijkstra', 'pa_noprune']: return 0
    return 10**4

for e in pd.unique(df.e):
    df_n = df[df.e == e]
    #display(df_n[df_n.alg == "pa"])
    plot_scaling(df_n, y='s_per_pair', x='n', prefix=experiment_name+'e={}'.format(e), xlog=True, ylog=True, trend_line='poly', title='e={}'.format(e), fit_min=fit_from)

In [None]:
# TOOL MEMORY COMPARISON
experiment_name = "memory"
df = tools
# Only print things that didn't time out.
df = df[df.exit_status == "ok"]
df = df[df.n >= 10**4]
def fit_from(algo):
    return 10**4

for e in pd.unique(df.e):
    df_n = df[df.e == e]
    display(df_n[df_n.alg == "pa"])
    plot_scaling(df_n, y='max_uss', x='n', prefix=experiment_name+'e={}'.format(e), xlog=True, ylog=True, trend_line=None, title='e={}'.format(e), fit_min=None)

In [None]:
# SCALING WITH e
# TODO: plot labels
experiment_name = "e_scaling"
df = read_benchmarks('table/tools_N1e6.tsv')
df = df[df.n == 10000]
#display(df)
plot_scaling(df, y='s_per_pair', x='e', prefix=experiment_name, xlog=False, ylog=True, trend_line=False)

In [None]:
# BEST PARAMS
e = 0.1
display(best_params[best_params.e==e]) # all
display(params[params.e == e]) # best


In [None]:
# SCALING WITH K
# TODO: Merge these lines into a single figure per e
# e=0.01: k=31, m=0
# e=0.05: k=12, m=0
# e=0.10: ???
# e=0.20: k=8..10, m=1, for large n, k>=10
e=0.1

for n in pd.unique(params.n):
    for m in [0,1]:
        df2 = params[(params.e == e) & (params.n == n) & (params.m == m) & (params.s_per_pair < 1000)]
        plot_scaling(df2, y='s_per_pair', x='k', prefix='k', xlog=False, ylog=False, title=f'n = {n}, m = {m}')

In [None]:
display(tools)