In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import json
import pandas as pd
import numpy as np
from prettytable import PrettyTable
import re
import os

In [None]:
# set plot format
my_template = dict(
    layout=go.Layout(title_font=dict(family="Helvetica", size=32),
                     font_family="Helvetica",
                     font_size=20,
                     legend=dict(
                         x=0,
                         y=1,
                         traceorder='normal',
                         font=dict(size=20),
                     ))
)

OUTPUT_DIRS = ["output/mpibench", "output/osu"]
for output_dir in OUTPUT_DIRS:
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

container_colors = {"nocont":"green", "docker":"blue", "podman":"purple", "singularity":"orange", "singularitysif":"orange", "charliecloud":"red", "sarus":"brown", "balena":"olive"}

In [None]:
def get_valid_filename(s):
    s = str(s).strip().replace(' ', '_')
    return re.sub(r'(?u)[^-\w.]', '', s)
        
def get_filename(benchmark, title):
    return benchmark + "_" + get_valid_filename(title)

def print_result(gb,
                 bench,
                 xgroup,
                 log_x=False, log_y=False,
                 nocont='nocont',
                 output_folder='output/',
                 print_data=False,
                 print_graph=True,
                 save_graph=False):
    for name, group in gb:
        if print_data:
            print(name)
            print(group)
        fig = go.Figure()
        gbc = group.groupby(['container'])
        
        table = PrettyTable()
        # save baremetal data
        for name2, group2 in gbc:
            if name2 == nocont:
                nc_xgroup = group2[xgroup]
                nc_ygroup = group2['avg']
                table.add_column(xgroup, np.array(nc_xgroup))
                table.add_column(nocont, np.array(nc_ygroup))
                break
        
        # add lines to graph
        for name2, group2 in gbc:
            if print_data:
                print(name2)
                print(group2)
            mode = 'lines+markers'
            if 'min' in group2 and 'max' in group2:
                error_y = dict(
                    type='data',
                    array=group2['max']-group2['avg'],
                    arrayminus=group2['avg']-group2['min'],
                    thickness=3, width=6,
                    visible=True)
            else:
                error_y = None
            fig.add_trace(go.Scatter(x=group2[xgroup], y=group2['avg'], mode=mode, name=name2,
                                     line=dict(color=container_colors[name2], width=6),
                                     error_y=error_y))
            # add column to table
            if name2 != nocont:
                slowdown = (np.array(group2['avg']) / np.array(nc_ygroup))
                table.add_column(name2, np.array(group2['avg']))
                table.add_column(name2 + ' x', slowdown)
        table.float_format = ".2"
        
        if xgroup == 'bytes' or xgroup == 'size':
            xaxis_title = 'msg size [bytes]'
        else:
            xaxis_title = xgroup
        
        fig.update_layout(autosize=True,
                          width=1000,
                          height=800,
                          template=my_template,
                          xaxis_title=xaxis_title,
                          yaxis_title='time [us]')
        fig.update_xaxes(automargin=True,
                         showgrid=True,
                         mirror=True,
                         ticks='inside',
                         showline=True)
        fig.update_yaxes(automargin=True,
                         showgrid=True,
                         mirror=True,
                         ticks='inside',
                         showline=True)
        if log_x:
            fig.update_layout(xaxis_type='log')
            fig.update_layout(
                xaxis = dict(
                    tickmode = 'array',
                    tickvals = [0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000],
                    ticktext = ['0.01', '0.1', '1', '10', '100', '1000', '10K', '100K', '1M']
                )
            )
        if log_y:
            fig.update_layout(yaxis_type='log')
            """
            fig.update_layout(
                yaxis = dict(
                    tickmode = 'array',
                    tickvals = [0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000],
                    ticktext = ['0.01', '0.1', '1', '10', '100', '1000', '10K', '100K', '1M']
                )
            )
            """
        if bench == 'mpibench' and xgroup == 'bytes':
            title = "{} operation, {} nodes, {} cores".format(name[2], name[0], name[1])
        elif bench == 'mpibench' and xgroup == 'nodes':
            title = "{} operation, {} cores, {} bytes".format(name[1], name[0], name[2])
        elif bench == 'osu' and xgroup == 'size':
            title = "{} operation, {} nodes, {} cores".format(name[2], name[0], name[1])
        elif bench == 'osu' and xgroup == 'nodes':
            title = "{} operation, {} cores, {} size".format(name[2], name[1], name[0])
        fig.update_layout(
            title=title
        )
        if print_graph:
            fig.show()
        if save_graph:
            fig.write_image(output_folder + bench + '/' + get_filename(bench, title) + ".pdf")
        
        print(title)
        print(table)

In [None]:
result_file = "res_mpi.txt"
containers = ["nocont", "singularity", "charliecloud"]

# MPI Bench
[https://github.com/LLNL/mpiBench](https://github.com/LLNL/mpiBench)

In [None]:
result_folder = "./mpiBench/"
operations = ["Barrier",
              "Bcast",
              "Alltoall",
              "Alltoallv",
              "Allgather",
              "Allgatherv",
              "Gather",
              "Gatherv",
              "Scatter",
              "Allreduce",
              "Reduce"]
nodes = [2, 4, 5, 6, 7]
ntasks = [10, 30, 60]
size = "64K"
rows = []

dfs = {}
for container in containers:
    rows = []
    for node in nodes:
        for ntask in ntasks:
            # open result file
            file_path = result_folder + container + "/" + str(node) + \
            "/" + str(ntask) + "/" + size + "/" + result_file
            result = open(file_path, 'r') 
            lines = result.readlines()

            # parse lines
            for line in lines:
                if line.split(' ', 1)[0] in operations:
                    data = line.split('\t')
                    opd = line.split(' ', 1)[0]
                    bytesd = int(data[2].strip())
                    itersd = int(data[4].strip())
                    avgd = float(data[6].strip())
                    mind = float(data[8].strip())
                    maxd = float(data[10].strip())
                    #print(opd, bytesd, itersd, avgd, mind, maxd)
                    rows.append([container, node, ntask, opd, bytesd, avgd, mind, maxd, itersd])
            
        dfs[container] = pd.DataFrame(rows, columns=['container', 'nodes', 'ntasks', 'operation', 'bytes', 'avg', 'min', 'max', 'itersd'])
        
# populate main df
df_mbench = pd.concat([dfs[df] for df in dfs]) 

In [None]:
df_mbench

## Print result

## x=bytes, y=time

In [None]:
gb_mbench = df_mbench.groupby(['nodes', 'ntasks', 'operation'])
print_result(gb_mbench,
             'mpibench',
             'bytes',
             log_x=True, log_y=True,
             print_graph=True,
             save_graph=False)

## x=nodes, y=time

In [None]:
gb_mbench = df_mbench.groupby(['ntasks', 'operation', 'bytes'])
print_result(gb_mbench,
             'mpibench',
             'nodes',
             log_x=False, log_y=True,
             print_graph=True,
             save_graph=False)

# OSU Micro-Benchmarks
[http://mvapich.cse.ohio-state.edu/benchmarks/](http://mvapich.cse.ohio-state.edu/benchmarks/)

In [None]:
def parse_osu(line, test):
    if line.split(' ', 1)[0] != "#"  and line.split(' ', 1)[0] != '\n':
        data = list(filter(lambda v: v != '', line.split(' ')))
        data = list(map(lambda v: v.strip(), data))
        if test == 'latency':
            return int(data[0]), float(data[1])
        elif test == 'alltoall':
            return int(data[0]), float(data[1]), float(data[2]), float(data[3]), int(data[4])
    else:
        if test == 'latency':
            return None, None
        elif test == 'alltoall':
            return None, None, None, None, None

result_folder = "./osu/"

## Alltoall

In [None]:
nodes = [1, 2, 3, 4, 5, 6, 7]
ntasks = [10, 30]
rows = []

operation = "alltoall"
for container in containers:
    for node in nodes:
        for ntask in ntasks:
            # open result file
            file_path = result_folder + "/" + operation + "/" + container + "/" + str(node) + \
            "/" + str(ntask) + "/" + result_file
            result = open(file_path, 'r') 
            lines = result.readlines()

            # parse lines
            for line in lines:
                size, avgt, mint, maxt, iterv = parse_osu(line, 'alltoall')
                if size != None:
                    #print(size, avgt)
                    rows.append([container, node, ntask, operation, size, avgt, mint, maxt, iterv])

# populate df
df_a2a = pd.DataFrame(rows, columns=['container', 'nodes', 'ntasks', 'operation', 'size', 'avg', 'min', 'max', 'iter'])

In [None]:
df_a2a

## x=size, y=time

In [None]:
gb_a2a = df_a2a.groupby(['nodes', 'ntasks', 'operation'])
print_result(gb_a2a,
             'osu',
             'size',
             log_x=True, log_y=True,
             print_graph=True,
             save_graph=True)

## x=nodes, y=time

In [None]:
gb_a2a = df_a2a.groupby(['size', 'ntasks', 'operation'])
print_result(gb_a2a,
             'osu',
             'nodes',
             log_x=False, log_y=True,
             print_graph=True,
             save_graph=False)

## Latency

In [None]:
operation = "latency"
nodes = [1, 2]
ntasks = [2, 1]
rows = []

for container in containers:
    for node in nodes:
        for ntask in ntasks:
            if node*ntask == 2:
                # open result file
                file_path = result_folder + "/" + operation + "/" + container + "/" + str(node) + \
                "/" + str(ntask) + "/" + result_file
                result = open(file_path, 'r') 
                lines = result.readlines()

                # parse lines
                for line in lines:
                    size, avgt = parse_osu(line, 'latency')
                    if size != None:
                        #print(size, avgt)
                        rows.append([container, node, ntask, operation, size, avgt])

df_lat = pd.DataFrame(rows, columns=['container', 'nodes', 'ntasks', 'operation', 'size', 'avg'])

In [None]:
df_lat

## x=size, y=time

In [None]:
gb_lat = df_lat.groupby(['nodes', 'ntasks', 'operation'])
print_result(gb_lat,
             'osu',
             'size',
             log_x=True, log_y=True,
             print_graph=True,
             save_graph=True)

## x=nodes, y=time

In [None]:
gb_lat = df_lat.groupby(['size', 'ntasks', 'operation'])
print_result(gb_lat,
             'osu',
             'nodes',
             log_x=False, log_y=True,
             print_graph=False,
             save_graph=False)