# This script processes the rev1 results 

1. Compile rev1 with:
```
make bin/baseline-rev1
make bin/baseline-rev1collap
```
2. Run the benchmarking script
```
cd scripts
python run_par_baseline-rev1.py
```
3. These are the outputs of the benchmarking script that will be processed, and should be already in the `Results` directory:
```
baseline-rev1_<hostname>.txt
baseline-rev1_collapse_<hostname>.txt 
```

In [None]:
import os
# get the hostname of the server
hostname = os.popen("hostname").read().strip()
# ensure the directory exists
os.makedirs(hostname, exist_ok=True)
# ensure the files exist
file_list = [                   
                f'baseline-rev1_{hostname}.txt', f'baseline-rev1_collapse_{hostname}.txt', # dynamic
            ]

for rev1_file in file_list:
    # if it is not in Results
    if not os.path.exists(rev1_file):
        # is the file already in the directory?
        assert os.path.exists(os.path.join(hostname, rev1_file)), f'File {rev1_file} not found: something went wrong with the baseline benchmark.'
    # if it is in Results
    else:
        # copy the file to the directory
        assert os.system(f'mv {rev1_file} {hostname}/') == 0, f'Failed to move {rev1_file} to {hostname}/'
# add the path to all the names in the list
for i in range(len(file_list)):
    file_list[i] = os.path.join(hostname, file_list[i])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import sys
from statistics import mean
from common.utils import get_nprocs, get_best_level

cloud_list = ['AlcoyH','ArzuaH','BrionFH','BrionUH']

# number of threads used in the execution
num_threads = get_nprocs()
# max number of threads
maxth = max(num_threads) # num_threads[-1]

def tokenize_rev1(filename, noH=False):
    """
        This function reads the output of the benchmark and returns a dictionary with the results.

        Args: 
            filename: the name of the file to read
            noH: if True, remove the 'H' at the end of the cloud names

        Returns:
            A dictionary with the results of the benchmark: dict(cloud_name: dict(chunk_size: dict(num_threads: list(times)))
    """
    experiment ={}
    with open(filename) as f:
        # for each line in the file
        for line in f:
            tokens = line.split()
            # if the line contains the word "Running:"
            if "Running:" in tokens:
                # get the name of the cloud
                name=tokens[2].split("/")[3]
                if noH:
                    name = name[:-1]
                # get the number of threads used
                nth=int(tokens[6])
                # get the chunk size used in the dynamic scheduling of the stage 1
                chunk=int(tokens[8])
                # if the cloud is not in the dictionary, add it
                if name not in experiment:
                    experiment[name]={}
                # if the chunk size is not in the dictionary, add it
                if chunk not in experiment[name]:
                    experiment[name][chunk]={}
                # if the number of threads is not in the dictionary, add it
                if nth not in experiment[name][chunk]:
                    experiment[name][chunk][nth]={}
                    # initialize the list of stage times
                    experiment[name][chunk][nth]['stages']=[]
            # get the time of building the quadtree
            elif 'Quadtree' in tokens:
                experiment[name][chunk][nth]['qtree'] = float(tokens[5])
            # get the time of the stage 1, 2 and 3
            elif "STAGE" in tokens:
                experiment[name][chunk][nth]['stages'].append(float(tokens[5]))
            # get the average time for OWM
            elif 'Average:' in tokens:
                experiment[name][chunk][nth]['owm'] = float(tokens[1])

    results = {}

    # for each cloud
    for cloud in experiment:
        results[cloud] = {}
        # for each chunk size
        for chunk in experiment[cloud]:
            results[cloud][chunk] = {}
            # for each number of threads
            for nth in experiment[cloud][chunk]:
                results[cloud][chunk][nth] = {}
                # the first element in the list is the time of building the qtree
                results[cloud][chunk][nth]['qtree'] = experiment[cloud][chunk][nth]['qtree']
                # the next three elements are the time of the stages 1, 2 and 3; one time for each of three elements
                results[cloud][chunk][nth]['stage1'] = mean(experiment[cloud][chunk][nth]['stages'][0::3])
                results[cloud][chunk][nth]['stage2'] = mean(experiment[cloud][chunk][nth]['stages'][1::3])
                results[cloud][chunk][nth]['stage3'] = mean(experiment[cloud][chunk][nth]['stages'][2::3])
                # the last elment is the average time
                results[cloud][chunk][nth]['owm'] = experiment[cloud][chunk][nth]['owm']
                # keep total = qtree + owm
                results[cloud][chunk][nth]['total'] = results[cloud][chunk][nth]['qtree'] + results[cloud][chunk][nth]['owm']
    return results

# dictionay with the results of the benchmark
all_results = {}
# get the results
for file in file_list:
    # get the key from the filename
    all_results[file.replace(f'_{hostname}.txt', '').replace(f'{hostname}/','')] = tokenize_rev1(file)
# point to the dynamic+collapse experiment as default
rrev1 = all_results['rev1_collapse']
print(list(all_results.keys()))

# Save results in All_optimizations

In [None]:
output = os.path.join(hostname, f'All_Optimizations-{hostname}.csv')

f = open(output, "a")
quadtree_t = []
for i,cloud in enumerate(rrev1):
    bestchunk = get_best_level(rrev1, cloud, maxth, 'total')
    quadtree_t.append(mean(list(rrev1[cloud][bestchunk][j]['qtree'] for j in num_threads)))
    print("Base-REV1-Quadtree; {}; {:.5f}; {:.5f};{};{}".format(cloud, quadtree_t[i], rrev1[cloud][bestchunk][maxth]['owm'], bestchunk, 0))
    f.write("Base-REV1-Quadtree;{};{:.5f};{:.5f};{};{}\n".format(cloud, quadtree_t[i], rrev1[cloud][bestchunk][maxth]['owm'], bestchunk, 0))

f.close()