# Plots

## Imports

In [51]:
import os
import re

In [52]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.offsetbox import AnchoredText
from matplotlib.ticker import FormatStrFormatter, ScalarFormatter, MultipleLocator, NullLocator

IPython magic:

In [102]:
%matplotlib inline
plt.rcParams["figure.dpi"] = 600
#plt.style.use('seaborn-whitegrid')
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.size": 12,
#    "ytick.minor.size":plt.rcParams["ytick.major.size"],
#    "ytick.minor.width":plt.rcParams["ytick.major.width"]
})
%config InlineBackend.figure_formats = ['pdf']

## Constants

In [54]:
DATA_PATH = "data/"
FILES = os.listdir(DATA_PATH)
FILES_PATH = dict(zip([file[:-4] for  file in FILES],
                      [os.path.join(DATA_PATH, file) for file in FILES]))

In [55]:
FILES_PATH

{'comparison-segments-s19': 'data/comparison-segments-s19.csv',
 'weakscaling-full-s18-22': 'data/weakscaling-full-s18-22.csv',
 'strongscaling-full-s20': 'data/strongscaling-full-s20.csv',
 'strongscaling-full-s19': 'data/strongscaling-full-s19.csv'}

In [56]:
FIGSIZE = (8,6)

In [111]:
ALGORITHMS = {
    0 : "Edge distributed union find",
    2 : "Edge distributed union find w. reduced edges",
    3 : "Vertex distributed w. simple pointer jumping",
    4 : "Vertex distributed w. pointer jumping",
    5 : "Vertex distributed w. supervertices ptr. jump.",
    6 : "Vertex distributed union find",
    7 : "Parallel Boost Graph Library 1.78.0"}

In [58]:
LINE_STYLES = {
    0 : dict(linestyle='-', color='blue', linewidth=1),
    2 : dict(linestyle='-.', color='blue', linewidth=1),
    3 : dict(linestyle='-', color='black', linewidth=1),
    4 : dict(linestyle='-.', color='black', linewidth=1),
    5 : dict(linestyle=':', color='black', linewidth=1),
    6 : dict(linestyle='--', color='black', linewidth=1),
    7 : dict(linestyle='-', color='red', linewidth=1.5)}

In [59]:
BAR_STYLES = {
    0 : dict(linestyle='-', edgecolor='blue', facecolor='white', linewidth=1),
    2 : dict(linestyle='-.', edgecolor='blue', facecolor='white', linewidth=1),
    3 : dict(linestyle='-', edgecolor='black', facecolor='white', linewidth=1),
    4 : dict(linestyle='-.', edgecolor='black', facecolor='white', linewidth=1),
    5 : dict(linestyle=':', edgecolor='black', facecolor='white', linewidth=1),
    6 : dict(linestyle='--', edgecolor='black', facecolor='white', linewidth=1),
    7 : dict(linestyle='-', edgecolor='red', facecolor='white', linewidth=1.5)}

## Functions

In [60]:
def double_std(array):
    return np.std(array) * 2

In [61]:
def tex_escape(text):
    """
        :param text: a plain text message
        :return: the message escaped to appear correctly in LaTeX
    """
    conv = {
        '&': r'\&',
        '%': r'\%',
        '$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\^{}',
        '\\': r'\textbackslash{}',
        '<': r'\textless{}',
        '>': r'\textgreater{}',
    }
    regex = re.compile('|'.join(re.escape(str(key)) for key in sorted(conv.keys(), key = lambda item: - len(item))))
    return regex.sub(lambda match: conv[match.group()], text)

## 1. Strong Scaling Size 19 with Boost

In [62]:
df_strongscaling_s19 = pd.read_csv(FILES_PATH['strongscaling-full-s19']).drop(labels='m1',axis=1)
df_strongscaling_s19.head(5)

Unnamed: 0,algorithm,cores,m2,m3,m4,m5,m6,m7,m8,m9,m10
0,0,1,0.583107,0.561699,0.578705,0.578035,0.563962,0.576622,0.565263,0.575251,0.561199
1,0,2,0.2888,0.3045,0.2856,0.2867,0.2853,0.2851,0.3035,0.2853,0.3023
2,0,4,0.1989,0.2016,0.1991,0.2004,0.2173,0.1999,0.2021,0.2012,0.2183
3,0,8,0.1466,0.1302,0.1323,0.1317,0.1303,0.1301,0.1296,0.1305,0.1304
4,0,16,0.108,0.128,0.1128,0.1115,0.1081,0.1117,0.1111,0.1076,0.1262


In [63]:
grouped_strongscaling_s19 = df_strongscaling_s19.set_index(['algorithm', 'cores']).stack()
res_strongscaling_s19 = {alg : grouped_strongscaling_s19[alg].groupby('cores').agg([np.mean, double_std]) for alg in set(df_strongscaling_s19.algorithm)}

In [131]:
fig, (ax1,ax2) = plt.subplots(2,1,sharex=True,gridspec_kw={'height_ratios':[1,4]},figsize=FIGSIZE)
fig.subplots_adjust(hspace=0.05)

ax2.grid(True, which='major')
ax1.grid(True, which='major')

ax2.xaxis.set_minor_locator(NullLocator())
ax2.yaxis.set_minor_locator(MultipleLocator(1))
ax2.yaxis.set_major_locator(MultipleLocator(1))
ax1.yaxis.set_minor_locator(MultipleLocator(1))
ax1.yaxis.set_major_locator(MultipleLocator(2))

xticks = sorted(set(df_strongscaling_s19.cores))[:-1]
ax2.set_xticks(xticks)
ax2.set_xlim([min(xticks)-0.4, max(xticks)+0.4])

max_time = df_strongscaling_s19.loc[:, 'm2':].to_numpy().max()

sep_lower = 5.55
sep_upper = 9.05

ax1.set_xscale('log',base=2)
ax2.set_xscale('log',base=2)
ax1.set_xlim(xmin = 1)
ax2.set_xlim(xmin = 1)
ax2.set_ylim([0,sep_lower])
ax1.set_ylim([sep_upper,max_time+0.3])
ax2.axhline(y=sep_lower, color='black',linestyle='-',dashes=(5,1),linewidth=1)
ax1.axhline(y=sep_upper, color='black',linestyle='-',dashes=(5,1),linewidth=1)

ax2.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
ax1.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))

ax2.spines['top'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
ax1.xaxis.set_ticks_position('none')

d = .015
diagonal_settings = dict(transform=ax1.transAxes, color='k', clip_on=False,linewidth=0.8)
ax1.plot((-d, +d), (-2*d, +2*d), **diagonal_settings)        # top-left diagonal
ax1.plot((1 - d, 1 + d), (-2*d, +2*d), **diagonal_settings)  # top-right diagonal
diagonal_settings.update(transform=ax2.transAxes)
ax2.plot((-d, +d), (1 - 0.5*d, 1 + 0.5*d), **diagonal_settings)  # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - 0.5*d, 1 + 0.5*d), **diagonal_settings)  # bottom-right diagonal

for alg in set(df_strongscaling_s19.algorithm):
    x = res_strongscaling_s19[alg].index
    y = res_strongscaling_s19[alg]['mean']
    ax2.errorbar(x, y, yerr=res_strongscaling_s19[alg]['double_std'],label=tex_escape(ALGORITHMS[alg]),**LINE_STYLES[alg],capsize=2)
    ax1.errorbar(x, y, yerr=res_strongscaling_s19[alg]['double_std'],label=tex_escape(ALGORITHMS[alg]),**LINE_STYLES[alg],capsize=2)

ax2.set_xlabel("Number of MPI processes")
fig.supylabel(r"Average time [$\mathrm{s}$]",x=0.05,fontsize=plt.rcParams['axes.titlesize'])
ax1.set_title(r"\textbf{Strong Scaling Study, $\mathbf{2^{19}}$ Vertices}")
legend = plt.legend(loc='upper center',bbox_to_anchor=(0.5,-0.15), fontsize=11, ncol=2,frameon=False)
fig.savefig('strongscaling_s19.pdf',bbox_inches='tight')
del fig, ax1, ax2, xticks, legend, diagonal_settings

<Figure size 4800x3600 with 2 Axes>

## 2. Strong Scaling Size 20 without Boost

In [65]:
df_strongscaling_s20 = pd.read_csv(FILES_PATH['strongscaling-full-s20']).drop(labels='m1',axis=1)
df_strongscaling_s20.head(5)

Unnamed: 0,algorithm,cores,m2,m3,m4,m5,m6,m7,m8,m9,m10
0,0,1,1.433306,1.41805,1.430561,1.416092,1.438895,1.416321,1.401156,1.432699,1.408864
1,0,2,0.7902,0.8055,0.802,0.8063,0.8088,0.7846,0.7919,0.7975,0.8079
2,0,4,0.4026,0.4145,0.4011,0.4011,0.4194,0.4174,0.4021,0.4012,0.4026
3,0,8,0.3348,0.3568,0.3391,0.3394,0.3496,0.3388,0.3411,0.356,0.3384
4,0,16,0.2379,0.2299,0.2531,0.2294,0.2321,0.2368,0.2304,0.2475,0.2296


In [66]:
grouped_strongscaling_s20 = df_strongscaling_s20.set_index(['algorithm', 'cores']).stack()
res_strongscaling_s20 = {alg : grouped_strongscaling_s20[alg].groupby('cores').agg([np.mean, double_std]) for alg in set(df_strongscaling_s20.algorithm)}

In [130]:
fig, (ax1,ax2) = plt.subplots(2,1,sharex=True,gridspec_kw={'height_ratios':[1,1]},figsize=FIGSIZE)
fig.subplots_adjust(hspace=0.05)

ax2.set_xscale('log',base=2)

ax2.grid(True, which='major')
ax1.grid(True, which='major')

ax2.xaxis.set_minor_locator(NullLocator())
ax2.yaxis.set_minor_locator(MultipleLocator(0.25))
ax2.yaxis.set_major_locator(MultipleLocator(0.25))
ax1.yaxis.set_minor_locator(MultipleLocator(0.25))
ax1.yaxis.set_major_locator(MultipleLocator(2))

xticks = sorted(set(df_strongscaling_s20.cores))
ax2.set_xticks(xticks)
# ax2.xaxis.set_major_formatter(ScalarFormatter())
ax2.set_xlim([min(xticks)-2**-4.7, max(xticks)+2**4.3])

max_time = df_strongscaling_s20.loc[:, 'm2':].to_numpy().max()

sep_lower = 1.55
sep_upper = 5.05
ax2.set_ylim([0,sep_lower])
ax1.set_ylim([sep_upper,np.ceil(max_time)])
ax2.axhline(y=sep_lower, color='black',linestyle='-',dashes=(5,1),linewidth=1)
ax1.axhline(y=sep_upper, color='black',linestyle='-',dashes=(5,1),linewidth=1)

ax2.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
ax1.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))

ax2.spines['top'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
ax1.xaxis.set_ticks_position('none')

d = .015
diagonal_settings = dict(transform=ax1.transAxes, color='k', clip_on=False,linewidth=0.8)
ax1.plot((-d, +d), (-d, +d), **diagonal_settings)        # top-left diagonal
ax1.plot((1 - d, 1 + d), (-d, +d), **diagonal_settings)  # top-right diagonal
diagonal_settings.update(transform=ax2.transAxes)
ax2.plot((-d, +d), (1 - d, 1 + d), **diagonal_settings)  # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **diagonal_settings)  # bottom-right diagonal

for alg in set(df_strongscaling_s20.algorithm):
    x = res_strongscaling_s20[alg].index
    y = res_strongscaling_s20[alg]['mean']
    ax2.errorbar(x, y, yerr=res_strongscaling_s20[alg]['double_std'],label=tex_escape(ALGORITHMS[alg]),**LINE_STYLES[alg],capsize=2)
    ax1.errorbar(x, y, yerr=res_strongscaling_s20[alg]['double_std'],label=tex_escape(ALGORITHMS[alg]),**LINE_STYLES[alg],capsize=2)

ax2.set_xlabel("Number of MPI processes")
fig.supylabel(r"Average time [$\mathrm{s}$]",x=0.045,fontsize=plt.rcParams['axes.titlesize'])
ax1.set_title(r"\textbf{Strong Scaling Study, $\mathbf{2^{20}}$ Vertices")
legend = plt.legend(loc='upper center',bbox_to_anchor=(0.5,-0.25), fontsize=11,ncol=2,frameon=False)

fig.savefig('strongscaling_s20.pdf',bbox_inches='tight')
del fig, ax1, ax2, xticks, legend, diagonal_settings

<Figure size 4800x3600 with 2 Axes>

## 3. Weak Scaling Sizes 18 to 22

In [68]:
df_weakscaling = pd.read_csv(FILES_PATH['weakscaling-full-s18-22']).drop(labels='m1',axis=1)
df_weakscaling = df_weakscaling[df_weakscaling.algorithm != 7]
df_weakscaling.head(5)

Unnamed: 0,algorithm,graph_size,cores,m2,m3,m4,m5,m6,m7,m8,m9,m10
0,0,18,1,0.299283,0.295788,0.318606,0.296134,0.298951,0.297034,0.319953,0.303603,0.312963
1,0,19,4,0.2021,0.2065,0.2207,0.2034,0.2031,0.204,0.2201,0.2043,0.2048
2,0,20,16,0.2339,0.2346,0.2474,0.2286,0.2376,0.2371,0.2501,0.2291,0.2277
3,0,21,64,0.4495,0.4378,0.4468,0.4301,0.4293,0.4477,0.4478,0.4293,0.4359
4,0,22,256,1.3085,1.3238,1.3172,1.3183,1.3271,1.3093,1.3318,1.309,1.3131


In [69]:
grouped_weakscaling = df_weakscaling.drop('graph_size', axis=1).set_index(['algorithm', 'cores']).stack()
res_weakscaling = {alg : grouped_weakscaling[alg].groupby('cores').agg([np.mean, double_std]) for alg in set(df_weakscaling.algorithm)}

In [110]:
fig, (ax1,ax2) = plt.subplots(2,1,sharex=True,gridspec_kw={'height_ratios':[1,1]},figsize=FIGSIZE)
fig.subplots_adjust(hspace=0.05)

ax2.set_xscale('log',base=2)

ax2.grid(True, which='major')
ax1.grid(True, which='major')

ax2.xaxis.set_minor_locator(NullLocator())
ax2.yaxis.set_minor_locator(MultipleLocator(0.5))
ax2.yaxis.set_major_locator(MultipleLocator(0.5))
ax1.yaxis.set_minor_locator(MultipleLocator(0.5))
ax1.yaxis.set_major_locator(MultipleLocator(10))

xticks = sorted(set(df_weakscaling.cores))
ax2.set_xticks(xticks)
ax2.set_xlim([min(xticks)-2**-4.7, max(xticks)+2**3.3])

max_time = df_weakscaling.loc[:, 'm2':].to_numpy().max()
sep_lower = 3.05
sep_upper = 4
ax2.set_ylim([0,sep_lower])
ax1.set_ylim([sep_upper,np.ceil(max_time)])
ax2.axhline(y=sep_lower, color='black',linestyle='-',dashes=(5,1),linewidth=1)
ax1.axhline(y=sep_upper, color='black',linestyle='-',dashes=(5,1),linewidth=1)

ax2.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
ax1.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    
xlabels = [f'[{x}, {15 + i}]' for i, x in enumerate(ax2.get_xticks())]
ax2.set_xticklabels(xlabels)

ax2.spines['top'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
ax1.xaxis.set_ticks_position('none')

d = .015
diagonal_settings = dict(transform=ax1.transAxes, color='k', clip_on=False,linewidth=0.8)
ax1.plot((-d, +d), (-d, +d), **diagonal_settings)        # top-left diagonal
ax1.plot((1 - d, 1 + d), (-d, +d), **diagonal_settings)  # top-right diagonal
diagonal_settings.update(transform=ax2.transAxes)
ax2.plot((-d, +d), (1 - d, 1 + d), **diagonal_settings)  # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **diagonal_settings)  # bottom-right diagonal

xlabels = [f'[{x}, {18 + i}]' for i, x in enumerate(ax2.get_xticks())]
ax2.set_xticklabels(xlabels)

for alg in set(df_weakscaling.algorithm):
    x = res_weakscaling[alg].index
    y = res_weakscaling[alg]['mean']
    ax2.errorbar(x, y, yerr=res_weakscaling[alg]['double_std'],label=tex_escape(ALGORITHMS[alg]),**LINE_STYLES[alg],capsize=2);
    ax1.errorbar(x, y, yerr=res_weakscaling[alg]['double_std'],label=tex_escape(ALGORITHMS[alg]),**LINE_STYLES[alg],capsize=2);

ax2.set_xlabel(r"Number of MPI processes, $\log_2$ of number of vertices")
fig.supylabel(r"Average time [$\mathrm{s}$]",x=0.045,fontsize=plt.rcParams['axes.titlesize'])
ax1.set_title(r"\textbf{Weak Scaling Study, $\mathbf{2^{18}}$ to $\mathbf{2^{22}}$ Vertices}")
legend = plt.legend(loc='upper center',bbox_to_anchor=(0.5,-0.25), fontsize=11,ncol=2,frameon=False)

fig.savefig('weakscaling.pdf',bbox_inches='tight')
del fig, ax1, ax2, xticks, legend, diagonal_settings, xlabels

<Figure size 4800x3600 with 2 Axes>

## 4. Comparison

Segments:
1. distribution
2. lightest_edge
3. roots

In [71]:
df_comparison = pd.read_csv(FILES_PATH['comparison-segments-s19'])
df_comparison

Unnamed: 0,algorithm,cores,s1_m1,s1_m2,s1_m3,s1_m4,s1_m5,s1_m6,s1_m7,s1_m8,...,s3_m1,s3_m2,s3_m3,s3_m4,s3_m5,s3_m6,s3_m7,s3_m8,s3_m9,s3_m10
0,0,64,0.0119,0.0066,0.004,0.0071,0.0055,0.0044,0.005,0.0039,...,0.0234,0.0235,0.0237,0.0236,0.0235,0.0236,0.0236,0.0236,0.0236,0.0234
1,2,64,0.0109,0.0063,0.004,0.0033,0.0059,0.0061,0.0044,0.0055,...,0.0217,0.0215,0.0216,0.0218,0.0216,0.0217,0.0218,0.0218,0.0217,0.0219
2,3,64,1.3632,1.3478,1.3399,1.347,1.3514,1.3632,1.3654,1.3782,...,0.0242,0.021,0.024,0.0238,0.0246,0.0242,0.0243,0.0241,0.0238,0.0203
3,4,64,1.3501,1.3505,1.3611,1.3732,1.3562,1.3566,1.3592,1.3583,...,0.1153,0.1119,0.0985,0.1097,0.1041,0.0987,0.0973,0.0997,0.0932,0.094
4,5,64,1.3508,1.3288,1.3194,1.3263,1.3217,1.3371,1.3287,1.3341,...,0.9229,0.929,0.6543,0.5663,0.5085,0.4255,0.3838,0.4603,0.3291,0.38
5,6,64,1.4181,1.3626,1.3873,1.3907,1.3759,1.3741,1.3923,1.3989,...,0.0097,0.0098,0.0098,0.0099,0.0098,0.0098,0.0098,0.0098,0.0098,0.0098


In [72]:
comparison_segments = list(df_comparison.loc[:,['algorithm',*(f's{j}_m{i}' for i in range(2,11))]] for j in range(1,4))
comparison_segments[0]

Unnamed: 0,algorithm,s1_m2,s1_m3,s1_m4,s1_m5,s1_m6,s1_m7,s1_m8,s1_m9,s1_m10
0,0,0.0066,0.004,0.0071,0.0055,0.0044,0.005,0.0039,0.0038,0.006
1,2,0.0063,0.004,0.0033,0.0059,0.0061,0.0044,0.0055,0.0055,0.0045
2,3,1.3478,1.3399,1.347,1.3514,1.3632,1.3654,1.3782,1.3767,1.375
3,4,1.3505,1.3611,1.3732,1.3562,1.3566,1.3592,1.3583,1.3424,1.342
4,5,1.3288,1.3194,1.3263,1.3217,1.3371,1.3287,1.3341,1.3354,1.3395
5,6,1.3626,1.3873,1.3907,1.3759,1.3741,1.3923,1.3989,1.3912,1.3866


In [73]:
grouped_comparison_segments = list(df_comparison_segment.set_index(['algorithm']).stack() for df_comparison_segment in comparison_segments)
res_comparison_segments = list({alg : grouped_comparison_segment[alg].agg([np.mean, np.std]) for alg in set(df_comparison.algorithm)} for grouped_comparison_segment in grouped_comparison_segments)

In [119]:
fig, ax = plt.subplots(figsize=FIGSIZE)

ax.yaxis.grid(True, which='major',zorder=0)
ax.set_ylim([0,2.5])

ax.yaxis.set_major_locator(MultipleLocator(0.25))

alg_shift = {0:0,2:1,3:2,4:3,5:4,6:5}
h = {0:"\\\\\\\\",1:"....",2:"////"}
w = 0.3
for alg in set(df_comparison.algorithm):
    #x = f"{res_comparison[alg]['mean']*1000:.1f} ms"
    x = alg_shift[alg]
    for delta,j in zip([-w,0,w],range(0,3)):
        std = res_comparison_segments[j][alg]['std']
        y = res_comparison_segments[j][alg]['mean']
        ax.bar(x+delta, y, width = w, yerr=std,hatch=h[j],**BAR_STYLES[alg],capsize=2,zorder=3)
        plt.annotate(f"${y:.2f}$",(x+delta,y+std),textcoords="offset points",xytext=(0,10),ha='center',fontsize=11,rotation=90)

get_mean = lambda alg: df_strongscaling_s19[(df_strongscaling_s19.algorithm == alg) & (df_strongscaling_s19.cores == 64)].loc[:,'m2':'m10'].mean(axis=1).values[0]

xlabels = [f"{get_mean(alg):.2f}" for alg in sorted(set(df_strongscaling_s19.algorithm))]
ax.set_xticks(np.arange(len(xlabels)),xlabels)
ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))

ax.set(xlabel=r"Total time [$\mathrm{s}]$",
       ylabel=r"Average time [$\mathrm{s}]$",
       title=r"\textbf{Comparison of time spent on parts, $\mathbf{2^{19}}$ Vertices}")

handles_algs = [mpatches.Patch(**BAR_STYLES[alg],label=tex_escape(ALGORITHMS[alg])) for alg in sorted(filter(lambda e: not(e == 7), set(df_strongscaling_s19.algorithm)))]
legend_algs = ax.legend(handles = handles_algs,loc='upper center',bbox_to_anchor=(0.5,-0.12), fontsize=11,ncol=2,frameon=False)

ax.add_artist(legend_algs)

handles_segments = [
    mpatches.Patch(hatch="\\\\\\\\",fill=False,linewidth=0,label=r"Graph distribution"),
    mpatches.Patch(hatch="....",fill=False,linewidth=0,label=r"Finding lightest edge"),
    mpatches.Patch(hatch="////",fill=False,linewidth=0,label=r"Finding root")
]
ax.legend(handles = handles_segments,title="Parts",loc=2,fontsize=11,framealpha=1.0)

fig.savefig('comparison.pdf',bbox_inches='tight')
plt.show()
del fig, ax, handles_algs, legend_algs, handles_segments, xlabels, alg_shift, get_mean

<Figure size 4800x3600 with 1 Axes>