# This script processes the rev1 results 

1. Compile rev1 with:
```
make bin/rev1
```
2. Run the benchmarking script
```
cd scripts
python3 run_par_rev1.py
```
3. The output that we will process is `rev1_collapse_<hostname>.txt` that should be already saved in the `Results` folder.


In [None]:
import os
# get the hostname of the server
hostname = os.popen("hostname").read().strip()
# ensure the directory exists
os.makedirs(hostname, exist_ok=True)
# ensure the file with baseline results exists
rev1_file = f'rev1_collapse_{hostname}.txt'
# if it is not in Results
if not os.path.exists(rev1_file):
    # is the file already in the directory?
    assert os.path.exists(os.path.join(hostname, rev1_file)), f'File {rev1_file} not found: something went wrong with the baseline benchmark.'
# if it is in Results
else:
    # copy the file to the directory
    assert os.system(f'mv {rev1_file} {hostname}/') == 0, f'Failed to move {rev1_file} to {hostname}/'
# rename rev1_file
rev1_file = os.path.join(hostname, rev1_file)

# Get the results from the `rev1_collapse` file

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import sys
from statistics import mean

cloud_list = ['AlcoyH','ArzuaH','BrionFH','BrionUH']

# get the number of physical cores
nprocs = int(os.popen("lscpu | grep 'Core(s) per socket' | awk '{print $4}'").read().strip())
# get the number of sockets
nsockets = int(os.popen("lscpu | grep 'Socket(s)' | awk '{print $2}'").read().strip())
nprocs *= nsockets
# number of threads used in the execution
num_threads = np.insert(np.linspace(2, nprocs, nprocs//2, dtype=int, endpoint=True), 0, 1)

def tokenize_rev1(filename, noH=False):
    """
        This function reads the output of the benchmark and returns a dictionary with the results.

        Args: 
            filename: the name of the file to read
            noH: if True, remove the 'H' at the end of the cloud names

        Returns:
            A dictionary with the results of the benchmark: dict(cloud_name: dict(chunk_size: dict(num_threads: list(times)))
    """
    experiment ={}
    with open(filename) as f:
        # for each line in the file
        for line in f:
            tokens = line.split()
            # if the line contains the word "Running:"
            if "Running:" in tokens:
                # get the name of the cloud
                name=tokens[2].split("/")[3]
                if noH:
                    name = name[:-1]
                # get the number of threads used
                nth=int(tokens[6])
                # get the chunk size used in the dynamic scheduling of the stage 1
                chunk=int(tokens[8])
                # if the cloud is not in the dictionary, add it
                if name not in experiment:
                    experiment[name]={}
                # if the chunk size is not in the dictionary, add it
                if chunk not in experiment[name]:
                    experiment[name][chunk]={}
                # if the number of threads is not in the dictionary, add it
                if nth not in experiment[name][chunk]:
                    experiment[name][chunk][nth]=[]
            # get the time of building the quadtree
            elif 'Octree' in tokens:
                experiment[name][chunk][nth].append(float(tokens[5]))
            # get the time of the stage 1, 2 and 3
            elif "STAGE" in tokens:
                experiment[name][chunk][nth].append(float(tokens[5]))
            # get the average time for OWM
            elif 'Average:' in tokens:
                experiment[name][chunk][nth].append(float(tokens[1]))

    results = {}

    # for each cloud
    for cloud in experiment:
        results[cloud] = {}
        # for each chunk size
        for chunk in experiment[cloud]:
            results[cloud][chunk] = {}
            # for each number of threads
            for nth in experiment[cloud][chunk]:
                results[cloud][chunk][nth] = {}
                # the first element in the list is the time of building the octree
                results[cloud][chunk][nth]['octree'] = experiment[cloud][chunk][nth][0]
                # the next three elements are the time of the stages 1, 2 and 3; one time for each of three elements
                results[cloud][chunk][nth]['stage1'] = mean(experiment[cloud][chunk][nth][1::3])
                results[cloud][chunk][nth]['stage2'] = mean(experiment[cloud][chunk][nth][2::3])
                results[cloud][chunk][nth]['stage3'] = mean(experiment[cloud][chunk][nth][3::3])
                # the last elment is the average time
                results[cloud][chunk][nth]['owm']=experiment[cloud][chunk][nth][-1]
                # keep total = octree + owm
                results[cloud][chunk][nth]['total'] = results[cloud][chunk][nth]['octree'] + results[cloud][chunk][nth]['owm']
    return results

# get the results
rrev1 = tokenize_rev1(rev1_file) 


# Get the results from Baseline as a reference

In [None]:
# this code is from process_o1quadtree_results.ipynb
experiment_base ={}
with open(os.path.join(hostname, f'baseline_{hostname}.txt')) as f:
    for line in f:
        tokens = line.split()
        if "Running:" in tokens:
            name=tokens[2].split("/")[3]
            nth=int(tokens[6])
            if name not in experiment_base:
                experiment_base[name]={'seq':{}, 'par':{} }
        if "SEQUENTIAL" in tokens:
            status="seq"
        if 'CORES' in tokens:
            status="par"
        if 'Octree' in tokens:
            experiment_base[name][status][nth]=[float(tokens[5])]
        if "STAGE" in tokens:
            experiment_base[name][status][nth].append(float(tokens[5]))
        if 'Average:' in tokens:
            experiment_base[name][status][nth].append(float(tokens[1]))

#print(experiment_base)

results_base = {}

for i in experiment_base:
    results_base[i]={'seq':{}, 'par':{} }
    results_base[i]['seq']['octree']=experiment_base[i]['seq'][1][0]
    results_base[i]['seq']['stage1']=mean(experiment_base[i]['seq'][1][1:15:3])
    results_base[i]['seq']['stage2']=mean(experiment_base[i]['seq'][1][2:15:3])
    results_base[i]['seq']['stage3']=mean(experiment_base[i]['seq'][1][3:15:3])
    results_base[i]['seq']['owm']=experiment_base[i]['seq'][1][16]
    # total time
    results_base[i]['seq']['total'] = results_base[i]['seq']['octree'] + results_base[i]['seq']['owm']
    for j in experiment_base[i]['par']:
        results_base[i]['par'][j]={}
        results_base[i]['par'][j]['octree']=experiment_base[i]['par'][j][0]
        results_base[i]['par'][j]['stage1']=mean(experiment_base[i]['par'][j][1:15:3])
        results_base[i]['par'][j]['stage2']=mean(experiment_base[i]['par'][j][2:15:3])
        results_base[i]['par'][j]['stage3']=mean(experiment_base[i]['par'][j][3:15:3])
        results_base[i]['par'][j]['owm']=experiment_base[i]['par'][j][16]
        # total time
        results_base[i]['par'][j]['total'] = results_base[i]['par'][j]['octree'] + results_base[i]['par'][j]['owm']

In [None]:
octree_time = []
# select the max number of threads
maxth = max(num_threads)

for i in results_base:
    print("Cloud {} has Octree creation time = {:.2f}".format(i,mean(list(results_base[i]['par'][j]['octree'] for j in num_threads))))
    octree_time.append(mean(list(results_base[i]['par'][j]['octree'] for j in num_threads)))
print()

for i in results_base:
    print("Cloud {} has sequential OWM time = {:.2f}".format(i,results_base[i]['seq']['owm'] ))
print()

for i in rrev1:
    # select the best chunk for each experiment with nth=1
    bestchunk = min(rrev1[i], key=lambda x: rrev1[i][x][1]['total'])
    print("Cloud {} has sequential OWM-rev1 time @ = {:.2f} ({:.2f}x) ; chunk={}".format(i, rrev1[i][bestchunk][1]['owm'], results_base[i]['seq']['owm']/rrev1[i][bestchunk][1]['owm'], bestchunk))     
print()

for i in experiment_base:
    print("Cloud {} has parallel OWM-Base with {} threads time (speedup) = {:.2f} ({:.1f}x)".format(i, maxth, results_base[i]['par'][maxth]['owm'], results_base[i]['seq']['owm']/results_base[i]['par'][maxth]['owm'] ))
print()

for i in rrev1:
    # select the best chunk for each experiment
    bestchunk = min(rrev1[i], key=lambda x: rrev1[i][x][maxth]['total'])
    print("Cloud {} has parallel OWM-rev1 with {} threads time(speedup wrt base {} th) = {:.2f} ({:.2f}x) ; chunk={}".format(i, maxth, maxth, rrev1[i][bestchunk][maxth]['owm'], results_base[i]['par'][maxth]['owm']/rrev1[i][bestchunk][maxth]['owm'], bestchunk))
print()

for i in rrev1:
    # select the best chunk for each experiment
    bestchunk = min(rrev1[i], key=lambda x: rrev1[i][x][maxth]['total'])
    print("Cloud {} has parallel OWM-rev1 with {} threads time(speedup wrt base 1th) = {:.2f} ({:.1f}x) ; chunk={}".format(i, maxth, rrev1[i][bestchunk][maxth]['owm'], results_base[i]['seq']['owm']/rrev1[i][bestchunk][maxth]['owm'], bestchunk))

In [None]:
def plot_chunk_time(nth, results):
    """
        This function plots the time of building the octree, the time of the stage 1, 2 and 3 and the average time for OWM for each cloud and chunk size.

        Args:
            nth: the number of threads used in the execution
            results: the dictionary with the results of the benchmark
    """
    #Configuration variables
    titlefs = 20
    ylabelfs = 18
    xlabelfs = 18
    xticksfs = 16
    yticksfs = 16
    legendfs = 14
    linew = 2
    markers = 8
    marks=['o-','x-','s-','v-','+-']

    #fig = plt.figure()
    labels=['OWM Trav.','Tree Const.','Total']
    #define grid of plots
    fig, axs = plt.subplots(nrows=1, ncols=4,figsize=(15, 5), constrained_layout=True) #sharey=True
    # for each cloud
    for i,cloud in enumerate(cloud_list):
        # get the list of chunk sizes
        blist = list(results[cloud].keys())
        # plot average time for OWM
        axs[i].plot(np.array(blist), np.array([results[cloud][chunk][nth]['owm'] for chunk in blist]), marks[0], linewidth=linew, markersize=markers)
        # plot the time of building the octree
        axs[i].plot(np.array(blist), np.array([results[cloud][chunk][nth]['octree'] for chunk in blist]), marks[1], linewidth=linew, markersize=markers)
        # plot the total time
        axs[i].plot(np.array(blist), np.array([results[cloud][chunk][nth]['total'] for chunk in blist]), marks[2], linewidth=linew, markersize=markers)

        axs[i].set_title(cloud,fontsize=16)
        axs[i].set_xlabel('Chunk Size', fontsize=xlabelfs)
        axs[i].set_xticks(blist)
        axs[i].tick_params(axis='x', labelsize=xticksfs)
        axs[i].grid()

    if(nth==1):
        fig.suptitle('Execution time (sec.)',  fontweight='bold', fontsize=18)
    if(nth>1):
        fig.suptitle(f'{nth}-threads execution time (sec.) REV1 @ {hostname.upper()}',  fontweight='bold', fontsize=18)

    axs[0].set_ylabel('Time (sec.)', fontsize=ylabelfs)
    axs[0].legend(labels,loc='best', fontsize= 14)
    pp = PdfPages(os.path.join(hostname, f'Rev1_{nth}coresExecTime-{hostname}.pdf'))
    pp.savefig(fig)
    pp.close()

# plot 8 and 16 if AlderLake
if hostname == 'alder':
    plot_chunk_time(8, rrev1)
    plot_chunk_time(16, rrev1)
else:
    plot_chunk_time(num_threads[-1], rrev1)


In [None]:
def plot_times_index(results, rbase, maxth=8):
    """
        This function plots the average time for OWM, the average time for OWM in the baseline, the time of building the octree and the total time for each cloud and the best chunk size.

        Args:
            results: the dictionary with the results of the benchmark
            rbase: the dictionary with the results of the baseline benchmark
            maxth: the maximum number of threads used in the execution
    """
    #Configuration variables
    titlefs = 20
    ylabelfs = 18
    xlabelfs = 18
    xticksfs = 16
    yticksfs = 16
    legendfs = 14
    linew = 2
    markers = 8
    marks=['o-','x-','s-','v-','+-']

    #fig = plt.figure()
    labels=['OWM Trav.', 'OWM Base.', 'Tree Const.', 'Total']
    #define grid of plots
    fig, axs = plt.subplots(nrows=1, ncols=4,figsize=(15, 5), constrained_layout=True) #sharey=True
    for i,name in enumerate(cloud_list):
        # select the best chunk for each experiment
        bestchunk = min(results[name], key=lambda x: results[name][x][maxth]['total'])
        # get the list of number of threads
        num_threads = list(results[name][bestchunk].keys())
        # get the average time for OWM
        axs[i].plot(np.array(num_threads), np.array([results[name][bestchunk][nth]['owm'] for nth in num_threads]), marks[0], linewidth=linew, markersize=markers)
        # get the average tiem for OWM in the baseline
        axs[i].plot(np.array(num_threads), np.array([rbase[name]['par'][nth]['owm'] for nth in num_threads]), '--x', color='red', linewidth=linew, markersize=markers)
        # get the time of building the octree
        axs[i].plot(np.array(num_threads), np.array([results[name][bestchunk][nth]['octree'] for nth in num_threads]), marks[1], linewidth=linew, markersize=markers)
        # get the total time
        axs[i].plot(np.array(num_threads), np.array([results[name][bestchunk][nth]['total'] for nth in num_threads]), marks[2], linewidth=linew, markersize=markers)

        axs[i].set_title(name+ " (chunk="+str(bestchunk)+")",fontsize=16)
        axs[i].set_xlabel('Num threads', fontsize=xlabelfs)
        axs[i].set_xticks(num_threads)
        axs[i].tick_params(axis='x', labelsize=xticksfs)
        axs[i].grid()

    fig.suptitle(f'Execution time (sec.) REV1 @ {hostname.upper()}',  fontweight='bold', fontsize=18)
    axs[0].set_ylabel('Time (sec.)', fontsize=ylabelfs)
    axs[0].legend(labels,loc='best', fontsize= 14)

plot_times_index(rrev1, results_base, maxth=num_threads[-1])

# Compare OWM traversal improvement

In [None]:
def printowm(nth, res, rbase):
    for i in res:
        bestlevel = min(res[i], key=lambda x: res[i][x][nth]['total'])
        print("Cloud {} has best chunk size for Open-MP dynamic scheduler = {}.".format(i, bestlevel))
        print("Cloud {} has {}-cores OWM-Base time = {:.2f}.".format(i, nth, rbase[i]['par'][nth]['owm']))    
        print("Cloud {} has {}-cores OWM-rev1 time = {:.2f}. Speedup (rev1 vs Base) = {:.2f}x.".format(i, nth, res[i][bestlevel][nth]['owm'], rbase[i]['par'][nth]['owm']/res[i][bestlevel][nth]['owm'] ))    

# print the improvement in the OWM time
printowm(num_threads[-1], rrev1, results_base)

# Compare Total (OWM traversal + Tree construction) improvement

In [None]:
def printtotal(nth, res, rbase):
    for i in res:
        bestchunk=min(res[i], key=lambda x: res[i][x][nth]['total'])
        print("Cloud {} has best chunk size for Open-MP dynamic scheduler = {}.".format(i, bestchunk))
        print("Cloud {} has {}-cores Base Total time = {:.2f}.".format(i, nth, rbase[i]['par'][nth]['total']))    
        print("Cloud {} has {}-cores Rev1 Total time = {:.2f}. Speedup (rev1 vs Base) = {:.2f}x.".format(i, nth, res[i][bestchunk][nth]['total'], rbase[i]['par'][nth]['total']/res[i][bestchunk][nth]['total'] ))    

# print the improvement in the total time
printtotal(num_threads[-1], rrev1, results_base)  

In [None]:
import seaborn as sns

#Configuration variables
def plot_res(res, rbase, maxth=8):
    titlefs = 20
    ylabelfs = 18
    xlabelfs = 18
    xticksfs = 16
    yticksfs = 16
    legendfs = 10
    linew = 2
    markers = 8

    fig = plt.figure()

    marks=['o-','x-','s-','v-','+-']
    # get the bestchunk for each cloud
    bestchunk = [min(res[i], key=lambda x: res[i][x][maxth]['total']) for i in res]
    # get the list of number of threads
    numthreads = list(res[cloud_list[0]][bestchunk[0]].keys())
    labels=['Tree Const.','OWM Trav.','Total']

    # get the color palette
    colors = sns.color_palette("deep")

    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(10, 5), constrained_layout=True)
    legend_labels=[i+" (chunk="+str(j)+")" for i,j in zip(res,bestchunk)]
    legend_labels.append('Ideal')

    for subfig,phase in enumerate(['owm','total']):
        print("Phase: "+phase+ " speedup for each cloud and best chunk for each cloud: "+str(list(res.keys())))
        for (i,z,chunk,color) in zip(res,marks,bestchunk,colors[:len(res)]):
            print(np.array([res[i][chunk][1][phase]/res[i][chunk][j][phase] for j in numthreads]))
            axs[subfig].plot(np.array(numthreads), np.array([res[i][chunk][1][phase]/res[i][chunk][j][phase] for j in numthreads]), z, linewidth=linew, markersize=markers, color=color, label=f'{i} (chunk={chunk})')
            axs[subfig].plot(np.array(numthreads), np.array([rbase[i]['seq'][phase]/rbase[i]['par'][j][phase] for j in numthreads]), '--o', linewidth=linew, markersize=markers, color=color, label=f'{i} Base')
            
        axs[subfig].plot(np.array(numthreads), np.array(numthreads), '-', linewidth=linew, markersize=markers, color=colors[6], label='Ideal')
        axs[subfig].set_title(labels[subfig], fontsize=titlefs)
        axs[subfig].set_xlabel('Number of cores', fontsize=xlabelfs)
        axs[subfig].set_xticks(numthreads)
        axs[subfig].tick_params(axis='x', labelsize=xticksfs)
        axs[subfig].grid()

    fig.suptitle(f'Speedup REV1 vs Baseline @ {hostname.upper()}',  fontweight='bold', fontsize=titlefs)
    axs[0].set_ylabel('Speedup', fontsize=ylabelfs)
    axs[0].legend(loc='best', fontsize=legendfs)
#    plt.yticks(fontsize=yticksfs)

    plt.show()
    return fig

fig = plot_res(rrev1, results_base, num_threads[-1])

In [None]:
pp = PdfPages(os.path.join(hostname, f'Speedup-rev1-collapse-{hostname}.pdf'))
pp.savefig(fig)
pp.close()