In [1]:
import common
import importlib
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import socket

# Show all columns and rows in a dataframe
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [18]:
# graphalytics inserts
pers_experiments = "num_threads_read == 0 and (hostname == 'scyper21' or hostname == 'scyper22')"

data_ins = common.import_gfe("view_graphalytics_inserts").query(pers_experiments).copy() # data from the experiments
data_ins["build_frequency"].fillna(pd.Timedelta(0), inplace=True) # replace NaT with 0, otherwise the records are ignored in the group by

data_ins = data_ins.query("((block_size == 512) and library == 'sortledton') or library != 'sortledton'")

# graphalytics load
data_load = common.import_gfe("view_graphalytics_load").query(pers_experiments).copy()
# shape the dataframe as data_ins
data_load["build_frequency"] = pd.Timedelta(0)
data_load["build_frequency_secs"] = np.NaN
data_load["num_snapshots_created"] = 0
data_load["num_threads_write"] = 0
data_load["block_size"] = -1

data = pd.concat([data_ins, data_load])
data = data.groupby(["library", "compiler_family", "graph", "build_frequency","num_threads_read", "num_threads_write", "algorithm"]) \
    .agg(completion_time=("median_secs", "median"), count=("median_secs", "count"))

data = data.unstack("algorithm")[("completion_time")]
data.index.set_names("compiler", level=1, inplace=True)

List of available executions:

In [19]:
fn_join_threads = lambda grp: ", ".join(map(str, grp.unique()))
data.reset_index().groupby(["library", "compiler"]).agg(num_threads_w=("num_threads_write", fn_join_threads), num_threads_r=("num_threads_read", fn_join_threads))

Unnamed: 0_level_0,Unnamed: 1_level_0,num_threads_w,num_threads_r
library,compiler,Unnamed: 2_level_1,Unnamed: 3_level_1
csr3,gcc,0,0
csr3-lcc,gcc,0,0
g1_v6-ref-ignore-build,gcc,20,0
livegraph3_ro,gcc,20,0
llama8-ref,gcc,16,0
sortledton,gcc,56,0
stinger7-ref,gcc,56,0
teseo-lcc-dv.12b,gcc,56,0
teseo-lcc.12,gcc,56,0


### CSR

In [20]:
csr = data.loc[("csr3-lcc", "gcc")].droplevel([1,2,3]).rename(columns = {"lcc": "lcc_sorted"})
csr["lcc_std"] = data.loc[("csr3", "gcc")].droplevel([1,2,3]).rename(columns = {"lcc": "lcc_std"})["lcc_std"]
csr = csr[["bfs", "cdlp", "lcc_std", "lcc_sorted", "pagerank", "sssp", "wcc"]] # reoder the columns
csr = pd.concat([csr], names=["library"], keys=["csr"]) # prepend the key `csr`

In [5]:
csr

Unnamed: 0_level_0,algorithm,bfs,cdlp,lcc_std,lcc_sorted,pagerank,sssp,wcc
library,graph,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
csr,com-friendster,1.007024,179.372491,835.65181,67.085626,31.575244,53.162582,10.20829
csr,dota-league,0.002061,1.116481,363.35308,53.038842,0.157165,0.434347,0.032011
csr,graph500-22,0.034984,7.841334,306.267269,12.374704,0.409857,0.915066,0.158914
csr,graph500-24,0.133839,24.633421,2236.280409,76.673949,2.718796,5.512071,0.925525
csr,graph500-26,0.49042,142.82706,,587.403737,15.290341,36.776917,4.950484
csr,uniform-24,0.147175,31.71586,21.949081,2.9333,3.445015,7.026035,1.174713
csr,uniform-26,0.581879,176.101286,92.364151,13.929533,18.702736,36.400172,6.212963


### Other systems

In [21]:
stinger = pd.concat( [
    data.loc[("stinger7-ref", "gcc")].droplevel([1, 2, 3])
], names=["library"], keys=["stinger"])
llama = pd.concat( [
    data.loc[("llama8-ref", "gcc", slice(None), "00:00:10", 0, 16)].droplevel([0, 1, 3, 4, 5]) # dynamic scheduling in OpenMP
], names=["library"], keys=["llama"] )
graphone = pd.concat( [
   data.loc[("g1_v6-ref-ignore-build", "gcc", slice(None), slice(None), 0, 20)].droplevel([0, 1, 3, 4, 5])
], names=["library"], keys=["graphone"] )
livegraph = pd.concat( [
    data.loc[("livegraph3_ro", "gcc", slice(None), slice(None), slice(None), 20)].droplevel([0, 1, 3, 4, 5])
], names=["library"], keys=["livegraph"] )
competitors = pd.concat([stinger,llama,graphone, livegraph]).rename(columns = {"lcc": "lcc_std"})
#competitors = pd.concat([stinger]).rename(columns = {"lcc": "lcc_std"})
competitors["lcc_sorted"] = np.NaN

### Teseo

In [7]:
teseo_lcc = data.loc[("teseo-lcc.12", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_sorted"})
teseo_dv_lcc = data.loc[("teseo-lcc-dv.12b", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_sorted"})
teseo_lcc["lcc_std"] = np.NaN
teseo_dv_lcc["lcc_std"] = np.NaN

#teseo_std = data.loc[("teseo.12", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_std"})
#teseo_std_dv = data.loc[("teseo-dv.12b", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_std"})
teseo_logical = pd.concat([ pd.concat([teseo_lcc], axis=1 ) ], names=["library"], keys=["teseo_logical"])
teseo_real = pd.concat([ pd.concat([teseo_dv_lcc], axis=1 ) ], names=["library"], keys=["teseo_real"])

### Sortledton

In [22]:
sortledton1 = data.loc[("sortledton", "gcc")].droplevel([1,2,3]).rename(columns={"lcc": "lcc_sorted"})
sortledton = pd.concat([ pd.concat([sortledton1], axis=1 ) ], names=["library"], keys=["sortledton"])
sortledton

Unnamed: 0_level_0,algorithm,bfs,cdlp,lcc_sorted,pagerank,sssp,wcc
library,graph,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
sortledton,com-friendster,3.200434,250.072451,137.252343,34.671289,65.692705,16.556693
sortledton,dota-league,0.003543,1.204768,429.506597,0.125936,0.409657,0.027747
sortledton,graph500-22,0.106116,6.123702,45.544339,0.449037,1.165284,0.132754
sortledton,graph500-24,0.418326,24.161871,229.016381,2.299,5.537105,0.64346
sortledton,graph500-26,1.527922,148.985451,1258.009455,11.994295,33.926792,3.10547
sortledton,uniform-24,0.409897,35.095181,5.073461,4.033271,11.302015,1.061742
sortledton,uniform-26,1.517634,157.081932,22.515912,20.74474,56.170403,7.690647


### Dataset

In [23]:
ds = pd.concat([csr, competitors, teseo_logical, teseo_real, sortledton])
ds = pd.concat([ds, pd.concat([ds.query("library != 'csr'").groupby(by="graph", axis=0).apply(lambda t: t.min())], names=["library"], keys=["best_excl_csr"])])
ds

Unnamed: 0_level_0,algorithm,bfs,cdlp,lcc_std,lcc_sorted,pagerank,sssp,wcc
library,graph,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
csr,com-friendster,1.007024,179.372491,835.65181,67.085626,31.575244,53.162582,10.20829
csr,dota-league,0.002061,1.116481,363.35308,53.038842,0.157165,0.434347,0.032011
csr,graph500-22,0.034984,7.841334,306.267269,12.374704,0.409857,0.915066,0.158914
csr,graph500-24,0.133839,24.633421,2236.280409,76.673949,2.718796,5.512071,0.925525
csr,graph500-26,0.49042,142.82706,,587.403737,15.290341,36.776917,4.950484
csr,uniform-24,0.147175,31.71586,21.949081,2.9333,3.445015,7.026035,1.174713
csr,uniform-26,0.581879,176.101286,92.364151,13.929533,18.702736,36.400172,6.212963
stinger,dota-league,0.123221,1.557826,1031.744225,,0.848719,1.066939,0.182674
stinger,graph500-22,0.141997,4.144559,898.389922,,1.412399,2.023461,0.656219
stinger,graph500-24,0.510672,18.533248,,,6.683881,9.431317,2.773185


### Speedup

### Create the output file

In [10]:
path = os.environ["HOME"] + "/workspace/sortledton/src/tables/analytics_speedup.tex"

In [11]:
def fmtsecs(value):
    if(value >= 20):
        return "{} s".format(int(value))
    else:
        return "{:.2f} s".format(value)

In [24]:
f = open(path, "w")
f.write(r"""
% This file was autogenerated by the script graphalytics_gen_table.ipynb

\resizebox{\textwidth}{!}{ % the table should use 100% of the available width
    \begin{tabular}[t]{clccccccc|clcccccccc}
    \toprule
        Graph  & System            & BFS       & CDLP       & LCC         & LCC (opt)   & PageRank     & SSSP        & WCC             
      & Graph  & System            & BFS       & CDLP       & LCC         & LCC (opt)   & PageRank     & SSSP        & WCC  \\   
    \midrule
""")

def write_csr(graph0, graph1, name0, name1, num_libraries):
    # first graph
    f.write("\\parbox[t]{2mm}{\\multirow{%s}{*}{\\rotatebox[origin=c]{90}{%s}}} & %% %s \n" % (num_libraries, name0, graph0)) 
    f.write("CSR (baseline) ")
    
    if True:
        d0 = ds.loc[("csr", graph0)]
        for c in d0: 
            if np.isnan(c):
                f.write("& DNF")
            else:   
                f.write("& $%s$ " % (fmtsecs(c)))
        f.write(" &\n")
    else:
        f.write(" & & & & & & & &")
        
    # second graph
    f.write("\\parbox[t]{2mm}{\\multirow{%s}{*}{\\rotatebox[origin=c]{90}{%s}}} & %% %s \n" % (num_libraries, name1, graph1)) 
    f.write("CSR (baseline) ") # without &
    d0 = ds.loc[("csr", graph1)]
    for c in d0: 
        if np.isnan(c):
            f.write("& DNF")
        else:       
            f.write("& $%s$ " % (fmtsecs(c)))
    f.write("\\\\\n")
    
def write_speedup_single0(library, graph):
    if True:
        completionTime = ds.loc[(library, graph)]
        speedup = completionTime  / ds.loc[("csr", graph)] 
        for column in speedup.index:
            f.write(" & ")
            value = speedup[column]
            best = ds.loc[("best_excl_csr", graph), column]
            if(np.isnan(value)):
                if column == "lcc_std" and (library == "sortledton" or library.startswith("teseo")):
                    f.write("N/A")
                elif column == "lcc_sorted" and not (library == "sortledton" or library.startswith("teseo")):
                    f.write("N/A")
                else:
                    f.write("DNF")
            elif completionTime[column] <= best:
                f.write("$\winner{");
                f.write("{:.2f}x".format(value))
                f.write("}$")
            else:
                f.write("${:.2f}x$".format(value))
    else:
        f.write(" & & & & & & & ")

def write_speedup_single(label, library, graph0, graph1):
    f.write(" & " + label)
    write_speedup_single0(library, graph0)
    f.write(" & & " + label)
    write_speedup_single0(library, graph1)
    f.write(" \\\\ \n")
    
def write_speedup(graph0, graph1, include_llama = True, include_stinger = True, include_graphone=True):
    if (include_stinger): write_speedup_single("Stinger", "stinger", graph0, graph1)
    if(include_llama): write_speedup_single("LLAMA", "llama", graph0, graph1)
    if (include_graphone): write_speedup_single("GraphOne", "graphone", graph0, graph1)
    write_speedup_single("LiveGraph", "livegraph", graph0, graph1)
    write_speedup_single("Teseo, log. vtx", "teseo_logical", graph0, graph1)
    if (graph0 != "com-friendster"): write_speedup_single("Teseo, real vtx", "teseo_real", graph0, graph1)
    write_speedup_single("Sortledton", "sortledton", graph0, graph1)
    
write_csr("dota-league", "graph500-22", "DOTA League", "Graph500 SF 22", 7)
write_speedup("dota-league", "graph500-22")
f.write("\\midrule \n")
write_csr("uniform-24", "graph500-24", "Uniform SF 24", "Graph500 SF 24", 7)
write_speedup("uniform-24", "graph500-24")
f.write("\\midrule \n")
write_csr("uniform-26", "graph500-26", "Uniform SF 26", "Graph500 SF 26", 6)
write_speedup("uniform-26", "graph500-26", include_llama = False, include_stinger = False)
f.write("\\midrule \n")
write_csr("com-friendster", "graph500-26", "Friendster", "Graph500 SF 26", 6)
write_speedup("com-friendster", "graph500-26", include_llama = False, include_stinger = False, include_graphone=False)
    
f.write(r"""
        \bottomrule
    \end{tabular}
} % end fo resizebox
""")
f.close()