# Imports

In [None]:
# Python version: 3.11.0
# matplotlib==3.10.0
# pandas==2.2.3
# numpy==2.2.1
# scipy==1.15.0
# scikit-learn==1.6.1
# seaborn==0.13.2

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression

# Graph Stats

In [None]:
# Graph statistics files
graph_stat_files = {"./graph_hdagg_metis.txt", "./graph_florida_stats.txt", "./graph_erdos_renyi_stats.txt", "./graph_band_stats.txt", "./graph_iChol_stats.txt"}

In [None]:
# Reading files into pandas dataframe
graph_df = pd.concat( [pd.read_csv(file) for file in graph_stat_files ], ignore_index=True )

In [None]:
# Dropping duplicates
graph_df.drop_duplicates(inplace = True)

In [None]:
# Changing index to Graph
graph_df = graph_df.set_index("Graph", drop=True)

In [None]:
# Adding double precision floating point operations to the dataframe
graph_df["FLOP_double_precision"] = 2 * graph_df["Edges"] + graph_df["Vertices"]

In [None]:
# Adding number of non-zeroes to the dataframe
graph_df["Number_of_non-zeroes"] = graph_df["Edges"] + graph_df["Vertices"]

## Graph Filter

In [None]:
# Graph filter templates

# All graphs
all_graphs = set(graph_df.index)

# Erdos-Renyi graphs
erdos_renyi_graphs = set( [g for g in all_graphs if g[:5] == "Erdos" ] )

# Random Bandwidth graphs
random_band_graphs = set( [g for g in all_graphs if g[:10] == "RandomBand" ] )

# Florida METIS and post Eigen Cholesky graphs
cholesky_graphs = set( [g for g in all_graphs if g[-9:] == "_postChol" ] )
florida_metis_graphs = set( [g for g in all_graphs if g[-6:] == "_metis" ] )

# Florida (SuiteSparse) graphs
florida_graphs = set( [g for g in all_graphs if g not in random_band_graphs and g not in erdos_renyi_graphs and g[:4] != "inst"  and g not in florida_metis_graphs and g not in cholesky_graphs])


In [None]:
# Setting graph filters for subsequent SpTrSV data analysis
graph_subset = all_graphs 
# graph_subset = erdos_renyi_graphs
# graph_subset = random_band_graphs
# graph_subset = cholesky_graphs
# graph_subset = florida_graphs
# graph_subset = florida_metis_graphs


# SpTrSV Data

In [None]:
# Folder location
folder_location = "./SpTrSV_Data/SC_paper/"

In [None]:
# All files in folder_location
data_files = set([file for file in os.listdir(folder_location)])
data_files = [file for file in data_files if file[:3] != "log"]

In [None]:
# Specifying Datatypes
data_type_dic = {
    "Graph":                            "object",
    "Machine":                          "object",
    "Algorithm":                        "object",
    "Permutation":                      "object",
    "SpTrSV_Runtime":                  "float64",
    "Work_Cost":                         "int64",
    "Base_Comm_Cost":                    "int64",
    "Supersteps":                        "int64",
    "_Base_Buffered_Sending":            "int64",
    "Base_CostsTotalCommunication":    "float64",
    "Schedule_Compute_time":             "int64",
    "Processors":                        "int64",
    "BSP_g":                             "int64",
    "BSP_l":                             "int64",
    "Scheduling_Threads":                "int64",
}

data_default_na_val_dic = {
    "Graph":                              "",
    "Machine":                            "",
    "Algorithm":                          "",
    "Permutation":                        "",
    "SpTrSV_Runtime":                  "0.0",
    "Work_Cost":                         "0",
    "Base_Comm_Cost":                    "0",
    "Supersteps":                        "1",
    "_Base_Buffered_Sending":            "0",
    "Base_CostsTotalCommunication":    "0.0",
    "Schedule_Compute_time":             "1",
    "Processors":                        "0",
    "BSP_g":                             "0",
    "BSP_l":                             "0",
    "Scheduling_Threads":                "1",
}

In [None]:
# Reading files into pandas dataframe
SpTrSV_df = pd.concat( [pd.read_csv( folder_location + file) for file in data_files], ignore_index=True )

In [None]:
# Data Deleting folder structure from 'Graph' column
SpTrSV_df["Graph"] = SpTrSV_df["Graph"].str.split("/").str[-1]

In [None]:
# Adding BSP parameters to the dataframe
SpTrSV_df[["Processors", "BSP_g", "BSP_l"]] = SpTrSV_df["Machine"].str.split("_", n=2, expand=True).reindex(range(3), axis=1)
SpTrSV_df["Processors"] = SpTrSV_df["Processors"].astype("object").str.slice(start=1).astype("int64", errors="ignore")
SpTrSV_df["BSP_g"] = SpTrSV_df["BSP_g"].astype("object").str.slice(start=1).astype("int64", errors="ignore")
SpTrSV_df["BSP_l"] = SpTrSV_df["BSP_l"].astype("object").str.slice(start=1).astype("int64", errors="ignore")

In [None]:
# Casting to correct Datatypes
for key, val in data_type_dic.items():
    SpTrSV_df[key] = SpTrSV_df[key].fillna(data_default_na_val_dic[key]).astype(val)

In [None]:
# Function to compute giga double precision floating point operations, denoted by GFP64
def compute_GFP64(time, graph, df = graph_df):
    flop = df.at[graph ,"FLOP_double_precision"]
    return (flop / time) / 1000000000

In [None]:
# Adding giga double precision floating point operations, denoted by GFP64, to the dataframe
SpTrSV_df["GFP64/s"] = SpTrSV_df[["Graph", "SpTrSV_Runtime"]].apply(lambda x: compute_GFP64(x["SpTrSV_Runtime"], x["Graph"]), axis=1)

In [None]:
# Set schedule compute time at least 1ms
SpTrSV_df["Schedule_Compute_time"] = SpTrSV_df["Schedule_Compute_time"].replace(0,1)

In [None]:
# Data sorting
SpTrSV_df.sort_values([ "Graph", "Algorithm" ], axis=0, inplace=True)

## Filters (incl. Algorithm, Processor)

In [None]:
# List of all algorithms
SpTrSV_df["Algorithm"].unique()

In [None]:
# Setting algorithm filter
alg_filter_set = set(SpTrSV_df["Algorithm"].unique())
alg_filter_set = set(["HDAGG_BIN", "SpMP", "SMGreedyBspGrowLocalAutoCoresParallel", "SMFunOriGrowlv2" ])

# Must always contain Serial and HDAGG_BIN
alg_filter_set.add("Serial")
alg_filter_set.add("HDAGG_BIN")

In [None]:
# Setting processor filter
proc_filter = SpTrSV_df["Processors"].unique()

In [None]:
# Scheduling Threads filter
sched_thread_filter = {1}

In [None]:
# Applying algorithm filter
SpTrSV_df_filtered = SpTrSV_df[ SpTrSV_df["Algorithm"].isin(alg_filter_set) ]

In [None]:
# Applying graph filter
SpTrSV_df_filtered = SpTrSV_df_filtered[ SpTrSV_df_filtered["Graph"].isin(graph_subset) ]

In [None]:
# Applying processor filter
SpTrSV_df_filtered = SpTrSV_df_filtered[ SpTrSV_df_filtered["Processors"].isin(proc_filter) ]

In [None]:
# Applying scheduling filter
SpTrSV_df_filtered = SpTrSV_df_filtered[ SpTrSV_df_filtered["Scheduling_Threads"].isin(sched_thread_filter) ]

## Evaluation

In [None]:
# Setting up new pandas dataframe with geometric mean of GFP64 and speed-up over serial execution
geom_mean_FLOPS_df = pd.DataFrame(columns=["Processors", "Graphs", "Algorithm", "GFP64/s", "Speedup_over_Serial", "Speedup_over_HDagg", "Speedup_over_SpMP" , "Profitability", "Schedule_Compute_time", "Schedule_Compute_time_speedup_over_HDagg", "Supersteps_relative_to_Wavefront", "Supersteps_relative_to_HDagg", "super_step_reduction"])
for name, group in SpTrSV_df_filtered.groupby(["Processors", "Graph"]):
    serial_flops = np.exp( np.log(group[ group["Algorithm"] == "Serial" ]["GFP64/s"]).mean() )
    serial_run_time_am = group[group["Algorithm"] == "Serial" ]["SpTrSV_Runtime"].mean()
    SpMP_flops = np.nan
    if group[ group["Algorithm"] == "SpMP" ]["GFP64/s"].count() > 0:
        SpMP_flops = np.exp( np.log(group[ group["Algorithm"] == "SpMP" ]["GFP64/s"]).mean() )
    HDagg_flops = np.nan
    if group[ group["Algorithm"] == "HDAGG_BIN" ]["GFP64/s"].count() > 0:
        HDagg_flops = np.exp( np.log(group[ group["Algorithm"] == "HDAGG_BIN" ]["GFP64/s"]).mean() )
    HDagg_schedule_compute_time = np.nan
    if group[ group["Algorithm"] == "HDAGG_BIN" ]["Schedule_Compute_time"].count() > 0:
        HDagg_schedule_compute_time = group[ group["Algorithm"] == "HDAGG_BIN" ]["Schedule_Compute_time"].mean()
    HDagg_supersteps = np.nan
    if group[ group["Algorithm"] == "HDAGG_BIN" ]["Supersteps"].count() > 0:
        HDagg_supersteps = group[ group["Algorithm"] == "HDAGG_BIN" ]["Supersteps"].mean()
    
   
    for alg, alg_group in group.groupby("Algorithm"):
        flops = np.exp( np.log(alg_group["GFP64/s"]).mean())
        run_time_am = alg_group["SpTrSV_Runtime"].mean()
        alg_schedule_compute_time = alg_group["Schedule_Compute_time"].mean()
        alg_supersteps = alg_group["Supersteps"].mean()
        wavefront_supersteps = graph_df.at[name[1], "Longest_Path"]

        profitable = 0
        if (alg == "Serial"):
            profitable = 0
        else:
            # conversion of alg_schedule_compute_time to s from ms
            profitable = (alg_schedule_compute_time / 1000.0) / (serial_run_time_am - run_time_am)
            if (profitable < 0.0):
                profitable = np.inf
            
        temp_df = pd.DataFrame([[name[0],      name[1],  alg,         flops,   flops/serial_flops,    flops/HDagg_flops,    flops/SpMP_flops,    profitable,      alg_schedule_compute_time, HDagg_schedule_compute_time/alg_schedule_compute_time, alg_supersteps/wavefront_supersteps, alg_supersteps/HDagg_supersteps, (HDagg_supersteps - alg_supersteps)/HDagg_supersteps ]],
                        columns=["Processors", "Graphs", "Algorithm", "GFP64/s", "Speedup_over_Serial", "Speedup_over_HDagg", "Speedup_over_SpMP", "Profitability", "Schedule_Compute_time",   "Schedule_Compute_time_speedup_over_HDagg",            "Supersteps_relative_to_Wavefront",  "Supersteps_relative_to_HDagg",  "super_step_reduction"] )
        geom_mean_FLOPS_df = pd.concat([geom_mean_FLOPS_df, temp_df], ignore_index=True)

In [None]:
# Adding logarithm of speed-up over serial and HDagg
geom_mean_FLOPS_df["Log2_speedup_over_Serial"] = np.log2( geom_mean_FLOPS_df["Speedup_over_Serial"] )
geom_mean_FLOPS_df["Log2_speedup_over_HDagg"] = np.log2( geom_mean_FLOPS_df["Speedup_over_HDagg"] )
geom_mean_FLOPS_df["Log2_speedup_over_SpMP"] = np.log2( geom_mean_FLOPS_df["Speedup_over_SpMP"] )
geom_mean_FLOPS_df["Log2_supersteps_relative_to_Wavefront"] = np.log2( geom_mean_FLOPS_df["Supersteps_relative_to_Wavefront"] )
geom_mean_FLOPS_df["Log2_supersteps_relative_to_HDagg"] = np.log2( geom_mean_FLOPS_df["Supersteps_relative_to_HDagg"] )


In [None]:
# Adding logarithm of schedule compute time over HDagg
geom_mean_FLOPS_df["Log2_Schedule_Compute_time_speedup_over_HDagg"] = np.log2( geom_mean_FLOPS_df["Schedule_Compute_time_speedup_over_HDagg"] )

In [None]:
# Adding number of non-zeros
geom_mean_FLOPS_df["NNZ"] = geom_mean_FLOPS_df["Graphs"].apply(lambda x: graph_df.at[x,"Number_of_non-zeroes"])

### GFP64/s

In [None]:
# Average Log speed-ups over Serial
florida_FLOPS = geom_mean_FLOPS_df[geom_mean_FLOPS_df["Graphs"].isin(florida_graphs)]

# SuiteSparse data set - main SpTRSV experiments
florida_agg = florida_FLOPS[["Processors", "Algorithm","Log2_speedup_over_Serial", "Log2_speedup_over_HDagg", "Log2_speedup_over_SpMP" , "Profitability", "Schedule_Compute_time", "Schedule_Compute_time_speedup_over_HDagg", "Supersteps_relative_to_HDagg", "Log2_supersteps_relative_to_Wavefront", "Log2_supersteps_relative_to_HDagg"]].groupby(["Processors","Algorithm"]).mean()
florida_agg["Geommean_serial"] = np.exp2(florida_agg["Log2_speedup_over_Serial"])
florida_agg["Geommean_hdagg"] = np.exp2(florida_agg["Log2_speedup_over_HDagg"])
florida_agg["Geommean_spmp"] = np.exp2(florida_agg["Log2_speedup_over_SpMP"])
florida_agg["Geommean_supersteps_relative_to_Wavefront"] = 1 / np.exp2(florida_agg["Log2_supersteps_relative_to_Wavefront"])
florida_agg["Geommean_supersteps_relative_to_HDagg"] = 1 / np.exp2(florida_agg["Log2_supersteps_relative_to_HDagg"])

# SuiteSparse data set - amortization threshold
florida_agg["median_profitability"] = florida_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).median()
florida_agg["q25"] = florida_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).quantile(0.25)
florida_agg["q75"] = florida_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).quantile(0.75)


# Erdos-Renyi data set - main SpTRSV experiments
er_FLOPS = geom_mean_FLOPS_df[geom_mean_FLOPS_df["Graphs"].isin(erdos_renyi_graphs)]
er_agg = er_FLOPS[["Processors", "Algorithm","Log2_speedup_over_Serial", "Log2_speedup_over_HDagg", "Log2_speedup_over_SpMP" , "Profitability", "Schedule_Compute_time", "Schedule_Compute_time_speedup_over_HDagg", "Supersteps_relative_to_HDagg", "Log2_supersteps_relative_to_Wavefront", "Log2_supersteps_relative_to_HDagg"]].groupby(["Processors","Algorithm"]).mean()
er_agg["Geommean_serial"] = np.exp2(er_agg["Log2_speedup_over_Serial"])
er_agg["Geommean_hdagg"] = np.exp2(er_agg["Log2_speedup_over_HDagg"])
er_agg["Geommean_spmp"] = np.exp2(er_agg["Log2_speedup_over_SpMP"])
er_agg["Geommean_supersteps_relative_to_Wavefront"] = 1 / np.exp2(er_agg["Log2_supersteps_relative_to_Wavefront"])
er_agg["median_profitability"] = er_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).median()
er_agg["q25"] = er_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).quantile(0.25)
er_agg["q75"] = er_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).quantile(0.75)

# Narrow Bandwidth data set - main SpTRSV experiments
rb_FLOPS = geom_mean_FLOPS_df[geom_mean_FLOPS_df["Graphs"].isin(random_band_graphs)]
rb_agg = rb_FLOPS[["Processors", "Algorithm","Log2_speedup_over_Serial", "Log2_speedup_over_HDagg", "Log2_speedup_over_SpMP" , "Profitability", "Schedule_Compute_time", "Schedule_Compute_time_speedup_over_HDagg", "Supersteps_relative_to_HDagg", "Log2_supersteps_relative_to_Wavefront", "Log2_supersteps_relative_to_HDagg"]].groupby(["Processors","Algorithm"]).mean()
rb_agg["Geommean_serial"] = np.exp2(rb_agg["Log2_speedup_over_Serial"])
rb_agg["Geommean_hdagg"] = np.exp2(rb_agg["Log2_speedup_over_HDagg"])
rb_agg["Geommean_spmp"] = np.exp2(rb_agg["Log2_speedup_over_SpMP"])
rb_agg["Geommean_supersteps_relative_to_Wavefront"] = 1 / np.exp2(rb_agg["Log2_supersteps_relative_to_Wavefront"])
rb_agg["median_profitability"] = rb_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).median()
rb_agg["q25"] = rb_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).quantile(0.25)
rb_agg["q75"] = rb_FLOPS[["Processors", "Algorithm","Profitability"]].groupby(["Processors","Algorithm"]).quantile(0.75)


In [None]:
# METIS data set - main SpTRSV experiments
florida_metis_FLOPS = geom_mean_FLOPS_df[geom_mean_FLOPS_df["Graphs"].isin(florida_metis_graphs)]
florida_metis_agg = florida_metis_FLOPS[["Processors", "Algorithm","Log2_speedup_over_Serial", "Log2_speedup_over_HDagg", "Log2_speedup_over_SpMP" , "Profitability", "Schedule_Compute_time", "Schedule_Compute_time_speedup_over_HDagg", "Supersteps_relative_to_HDagg", "Log2_supersteps_relative_to_Wavefront", "Log2_supersteps_relative_to_HDagg"]].groupby(["Processors","Algorithm"]).mean()
florida_metis_agg["Geommean_serial"] = np.exp2(florida_metis_agg["Log2_speedup_over_Serial"])
florida_metis_agg["Geommean_hdagg"] = np.exp2(florida_metis_agg["Log2_speedup_over_HDagg"])
florida_metis_agg["Geommean_spmp"] = np.exp2(florida_metis_agg["Log2_speedup_over_SpMP"])
florida_metis_agg["Geommean_supersteps_relative_to_Wavefront"] = 1 / np.exp2(florida_metis_agg["Log2_supersteps_relative_to_Wavefront"])

In [None]:
# iChol data set - main SpTRSV experiments
florida_pchol_FLOPS = geom_mean_FLOPS_df[geom_mean_FLOPS_df["Graphs"].isin(cholesky_graphs)]
florida_pchol_agg = florida_pchol_FLOPS[["Processors", "Algorithm","Log2_speedup_over_Serial", "Log2_speedup_over_HDagg", "Log2_speedup_over_SpMP" , "Profitability", "Schedule_Compute_time", "Schedule_Compute_time_speedup_over_HDagg", "Supersteps_relative_to_HDagg", "Log2_supersteps_relative_to_Wavefront", "Log2_supersteps_relative_to_HDagg"]].groupby(["Processors","Algorithm"]).mean()
florida_pchol_agg["Geommean_serial"] = np.exp2(florida_pchol_agg["Log2_speedup_over_Serial"])
florida_pchol_agg["Geommean_hdagg"] = np.exp2(florida_pchol_agg["Log2_speedup_over_HDagg"])
florida_pchol_agg["Geommean_spmp"] = np.exp2(florida_pchol_agg["Log2_speedup_over_SpMP"])
florida_pchol_agg["Geommean_supersteps_relative_to_Wavefront"] = 1 / np.exp2(florida_pchol_agg["Log2_supersteps_relative_to_Wavefront"])

In [None]:
###########################################################
#####  The output of the following cells corresponds to Tables 7.1 & 7.2 
###########################################################


florida_agg[["Geommean_serial", "Geommean_hdagg", "Geommean_spmp", "Geommean_supersteps_relative_to_Wavefront"]]

In [None]:
florida_metis_agg[["Geommean_serial", "Geommean_hdagg", "Geommean_spmp", "Geommean_supersteps_relative_to_Wavefront"]]

In [None]:
florida_pchol_agg[["Geommean_serial", "Geommean_hdagg", "Geommean_spmp", "Geommean_supersteps_relative_to_Wavefront"]]

In [None]:
er_agg[["Geommean_serial", "Geommean_hdagg", "Geommean_spmp", "Geommean_supersteps_relative_to_Wavefront"]]

In [None]:
rb_agg[["Geommean_serial","Geommean_hdagg", "Geommean_spmp", "Geommean_supersteps_relative_to_Wavefront"]]

In [None]:
###########################################################
#####  The output of this cell corresponds to Table 7.6  
###########################################################

florida_agg[["Profitability" , "q25", "median_profitability", "q75"]]

In [None]:
###########################################################
#####  The output of this cell corresponds to Figure 1.2
###########################################################

sns.set_style("whitegrid")
for name, group in florida_FLOPS.groupby("Processors"):
    plt.figure(figsize=(6,4))
    
    ax = sns.violinplot(palette=sns.color_palette("husl", 6)[4:5], x="Algorithm", y="Log2_speedup_over_Serial", order=["SMGreedyBspGrowLocalAutoCoresParallel", "SpMP", "HDAGG_BIN" ], inner="quart", data=group[group["Algorithm"].isin(["HDAGG_BIN", "SpMP", "SMGreedyBspGrowLocalAutoCoresParallel"])]) 

    plt.xlabel("", fontsize=0, labelpad=10)
    
    #Warning: Labels are not guaranteed to be in the correct order
    plt.xticks(ticks=[0,1,2], labels=["GrowLocal", "SpMP" ,"HDagg"], fontsize=14)
    plt.yticks(fontsize=14, ticks=[-2, -1, 0, 1 , 2 , 3 , 4, 5], labels=["0.25", "0.5", "1", "2", "4", "8", "16", "32"])
    
    plt.ylabel("Speed-up over Serial", fontsize=14)
    plt.savefig("intro_plot_violin.eps", bbox_inches="tight", transparent=True)
    plt.show()

### Schedule compute time

In [None]:
# Average Log speed-ups of schedule computation time over HDagg
geom_mean_FLOPS_df[["Processors", "Algorithm", "Log2_Schedule_Compute_time_speedup_over_HDagg"]].groupby(["Processors","Algorithm"]).mean()

In [None]:
# Scheduling Scatterplot

best_linear_fit_log_normalised = True

alg_filter_sched_time_scatter = ["SMFunOriGrowlv2", "SMGreedyBspGrowLocalAutoCoresParallel"]

reg = dict()

if best_linear_fit_log_normalised:
    for alg in alg_filter_sched_time_scatter:
        if alg not in geom_mean_FLOPS_df["Algorithm"].unique():
            continue
        tmp_df = geom_mean_FLOPS_df[ geom_mean_FLOPS_df["Algorithm"] == alg ]
        X = tmp_df["NNZ"].to_numpy()
        y = tmp_df["Schedule_Compute_time"].to_numpy()
        
        err = np.log2(y) - np.log2(X)
        reg[alg] = np.mean(err)
        
else:
    for alg in alg_filter_sched_time_scatter:
        if alg not in geom_mean_FLOPS_df["Algorithm"].unique():
            continue
        tmp_df = geom_mean_FLOPS_df[ geom_mean_FLOPS_df["Algorithm"] == alg ]
        X = tmp_df[["NNZ"]].to_numpy()
        y = tmp_df["Schedule_Compute_time"].to_numpy()

        X = X.reshape(-1, 1)
        y = y.reshape(-1, 1)

        reg[alg] = LinearRegression(fit_intercept=False)
        reg[alg].fit(X, y)
    

g = sns.scatterplot(data=geom_mean_FLOPS_df[geom_mean_FLOPS_df["Algorithm"].isin(alg_filter_sched_time_scatter)],
            x="NNZ", y="Schedule_Compute_time", hue="Algorithm", style="Algorithm")
if best_linear_fit_log_normalised:
    g.set(yscale="log")
    g.set(xscale="log")
plt.xlabel("Number of non-zeros")
plt.ylabel("Schedule compute time [ms]")
plt.legend(title="Algorithm")
leg = g.axes.get_legend()
new_labels = ['Funnel+GL', 'GrowLocal']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
sns.move_legend(g, "lower right")


if best_linear_fit_log_normalised:
    arr = np.linspace(min(geom_mean_FLOPS_df["NNZ"].unique()), max(geom_mean_FLOPS_df["NNZ"].unique()), 10001)
    arr = arr.reshape(-1, 1)

    for alg in alg_filter_sched_time_scatter:
        if alg not in geom_mean_FLOPS_df["Algorithm"].unique():
            continue
        plt.plot(arr, arr * np.exp2(reg[alg]))
else:
    arr = np.linspace(min(geom_mean_FLOPS_df["NNZ"].unique()), max(geom_mean_FLOPS_df["NNZ"].unique()), 10001)
    arr = arr.reshape(-1, 1)

    for alg in alg_filter_sched_time_scatter:
        if alg not in geom_mean_FLOPS_df["Algorithm"].unique():
            continue
        plt.plot(arr, reg[alg].predict(arr))        

plt.savefig("schedule_compute_time_1_thread.eps", bbox_inches="tight", transparent=True)

### Performance graphs

In [None]:
alg_dict = {'HDAGG_BIN': "HDagg",
            'Serial': "Serial",
            'SpMP': "SpMP",
            'SMFunOriGrowlv2': "Funnel+GL",
            'SMGreedyBspGrowLocalAutoCoresParallel': "GrowLocal"
           }

In [None]:
SpTrSV_df_florida = SpTrSV_df_filtered[ SpTrSV_df_filtered["Graph"].isin(florida_graphs) ]
smallest_runtime_df = SpTrSV_df_florida.groupby("Graph").min()
total_runs_df = SpTrSV_df_florida.groupby("Algorithm").count()

In [None]:
max_threshold = 5.0
granularity = 513

performance_df = pd.DataFrame()

for alg, df in SpTrSV_df_florida[["Graph", "Algorithm", "SpTrSV_Runtime"]].groupby("Algorithm"):
    #skipping algorithms
    if alg in ["Serial", "Wavefront"]:
        continue
    for threshhold in np.linspace(1.0, max_threshold, num = granularity):
        count = 0
        for graph_name, runs_df in df.groupby("Graph"):
            number_runs_smaller_than_thresh = len( runs_df[runs_df["SpTrSV_Runtime"] <= threshhold * smallest_runtime_df.at[graph_name, "SpTrSV_Runtime"]].index)
            count += number_runs_smaller_than_thresh
        performance_df.loc[threshhold, alg_dict[alg]] = count / total_runs_df.at[alg, "Graph"]
        

In [None]:
###########################################################
#####  The output of this cell corresponds to Figure 7.1
###########################################################

plt.figure(figsize=(8,6))
perf_plot = sns.lineplot(data=performance_df, palette=sns.color_palette("husl", 6))
perf_plot.set_ylim(0.0, 1.0)
perf_plot.set_xlim(1.0, max_threshold)
sns.move_legend(perf_plot, "lower right")
plt.savefig("performance_plot_florida.eps", bbox_inches="tight", transparent=True)