# This script processes the rev1 results 

1. Compile rev1 with:
```
make bin/baseline-rev1
make bin/baseline-rev1collap
```
2. Run the benchmarking script
```
cd scripts
python run_par_baseline-rev1.py
```
3. These are the outputs of the benchmarking script that will be processed, and should be already in the `Results` directory:
```
baseline-rev1_<hostname>.txt
baseline-rev1_collapse_<hostname>.txt 
```

In [None]:
import os
# get the hostname of the server
hostname = os.popen("hostname").read().strip()
# ensure the directory exists
os.makedirs(hostname, exist_ok=True)
# ensure the files exist
file_list = [                   
                f'baseline-rev1_{hostname}.txt', f'baseline-rev1_collapse_{hostname}.txt', # dynamic
            ]

for rev1_file in file_list:
    # if it is not in Results
    if not os.path.exists(rev1_file):
        # is the file already in the directory?
        assert os.path.exists(os.path.join(hostname, rev1_file)), f'File {rev1_file} not found: something went wrong with the baseline benchmark.'
    # if it is in Results
    else:
        # copy the file to the directory
        assert os.system(f'mv {rev1_file} {hostname}/') == 0, f'Failed to move {rev1_file} to {hostname}/'
# add the path to all the names in the list
for i in range(len(file_list)):
    file_list[i] = os.path.join(hostname, file_list[i])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import sys
from statistics import mean
from common.utils import get_nprocs, get_best_level

# number of threads used in the execution
num_threads = get_nprocs()
# get the maximum number of threads as reference
maxth = max(num_threads) # num_threads[-1]

def tokenize_baseline(file):
    experiment ={}

    with open(file) as f:
        for line in f:
            tokens = line.split()
            if "Running:" in tokens:
                name=tokens[2].split("/")[-1]
                nth=int(tokens[6])
                # get the chunk size used in the scheduler
                chunksize = int(tokens[8])
                if name not in experiment:
                    experiment[name]={}
                # if chunksize is not in the dictionary, add it
                if chunksize not in experiment[name]:
                    experiment[name][chunksize]={}
                # if the number of threads is not in the dictionary, add it
                if nth not in experiment[name][chunksize]:
                    experiment[name][chunksize][nth]={}
                    # initialize the list of stage times
                    experiment[name][chunksize][nth]['stages']=[]                

            if 'Octree' in tokens:
                experiment[name][chunksize][nth]['octree'] = float(tokens[5])
            if "STAGE" in tokens:
                experiment[name][chunksize][nth]['stages'].append(float(tokens[5]))
            if 'Average:' in tokens:
                experiment[name][chunksize][nth]['owm'] = float(tokens[1])

    results = {}

    # for each cloud
    for cloud in experiment:
        results[cloud] = {}
        # for each chunk size
        for chunk in experiment[cloud]:
            results[cloud][chunk] = {}
            # for each number of threads
            for nth in experiment[cloud][chunk]:
                results[cloud][chunk][nth] = {}
                # the first element in the list is the time of building the octree
                results[cloud][chunk][nth]['octree'] = experiment[cloud][chunk][nth]['octree']
                # the next three elements are the time of the stages 1, 2 and 3; one time for each of three elements
                results[cloud][chunk][nth]['stage1'] = mean(experiment[cloud][chunk][nth]['stages'][0::3])
                results[cloud][chunk][nth]['stage2'] = mean(experiment[cloud][chunk][nth]['stages'][1::3])
                results[cloud][chunk][nth]['stage3'] = mean(experiment[cloud][chunk][nth]['stages'][2::3])
                # the last elment is the average time
                results[cloud][chunk][nth]['owm'] = experiment[cloud][chunk][nth]['owm']
                # keep total = octree + owm
                results[cloud][chunk][nth]['total'] = results[cloud][chunk][nth]['octree'] + results[cloud][chunk][nth]['owm']

    return results

# dictionay with the results of the benchmark
all_results = {}
# get the results
for file in file_list:
    # get the key from the filename
    all_results[file.replace(f'_{hostname}.txt', '').replace(f'{hostname}/','')] = tokenize_baseline(file)
# porint to the dynamic
brev1 = all_results['baseline-rev1']
# point to the dynamic+collapse
brev1collap = all_results['baseline-rev1_collapse']
print(list(all_results.keys()))

# Save results in All_optimizations

In [None]:
output = os.path.join(hostname, f'All_Optimizations-{hostname}.csv')

with open(output, "a") as f:
    octree_t = []
    for i,cloud in enumerate(brev1):
        bestchunk = get_best_level(brev1, cloud, maxth, 'total')
        octree_t.append(mean(list(brev1[cloud][bestchunk][j]['octree'] for j in num_threads)))
        print("Base-REV1; {}; {:.5f}; {:.5f};{};{}".format(cloud, octree_t[i], brev1[cloud][bestchunk][maxth]['owm'], bestchunk, 0))
        f.write("Base-REV1;{};{:.5f};{:.5f};{};{}\n".format(cloud, octree_t[i], brev1[cloud][bestchunk][maxth]['owm'], bestchunk, 0))    
    octree_t = []
    for i,cloud in enumerate(brev1collap):
        bestchunk = get_best_level(brev1collap, cloud, maxth, 'total')
        octree_t.append(mean(list(brev1collap[cloud][bestchunk][j]['octree'] for j in num_threads)))
        print("Base-REV1collap; {}; {:.5f}; {:.5f};{};{}".format(cloud, octree_t[i], brev1collap[cloud][bestchunk][maxth]['owm'], bestchunk, 0))
        f.write("Base-REV1collap;{};{:.5f};{:.5f};{};{}\n".format(cloud, octree_t[i], brev1collap[cloud][bestchunk][maxth]['owm'], bestchunk, 0))