In [1]:
# Import packages
from __future__ import division, print_function # Imports from __future__ since we're running Python 2
import os
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
import scipy as sp
import scipy.stats

import itertools
from tabulate import tabulate

from tqdm import tqdm_notebook

%run '/home/mark/Desktop/Plastic-Parallel-Programming/data_analysis/year 2/utilities.ipynb'

Program parameters:

In [2]:
machine = "XXXII"

instances = ["cpu_small", "cpu_large", "vm_small", "vm_large"]
configurations = list(itertools.combinations_with_replacement(instances, 3))

In [3]:
number_of_tests = 729
num_workers_max = 12
num_cores_max   = 12
    
root_folder_path = "results/" + machine + "/"

file_names = []

for i in range(1, number_of_tests + 1): 
    file_names.append("test" + str(i) + "/output")

In [4]:
path, dirs, files = os.walk(root_folder_path).next()
num_exps = int(len(dirs) / 3)

In [5]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
    return int(round(m-h)), int(round(m)), int(round(m+h))

In [6]:
table_data = []

for exp_number in tqdm_notebook(range(1, num_exps + 1)):
    
    number_of_tests = 729
    num_workers_max = 12
    num_cores_max   = 12

    data1 = []
    data2 = []
    data3 = []

    fetch_data(root_folder_path + "/exp_" + str(exp_number) + "_1/", file_names, data1, ["Runtime"])
    fetch_data(root_folder_path + "/exp_" + str(exp_number) + "_2/", file_names, data2, ["Runtime"])
    fetch_data(root_folder_path + "/exp_" + str(exp_number) + "_3/", file_names, data3, ["Runtime"])

    dataset = []
    raw_dataset = []

    num_workers_min = 4
    num_workers_step = 4
    num_workers_values = range(num_workers_min, num_workers_max + num_workers_step, num_workers_step)
    nwv_len = len(num_workers_values)
    num_workers = num_workers_values[0]

    num_cores_min = 4
    num_cores_step = 4
    num_cores_values = range(num_cores_min, num_cores_max + num_cores_step, num_cores_step)
    ncv_len = len(num_cores_values)
    num_cores = num_cores_values[0]

    for i in range(len(data1)):
        num_workers3 = num_workers_values[i % nwv_len]
        num_cores3   = num_cores_values[(i // nwv_len) % ncv_len]
        num_workers2 = num_workers_values[((i // nwv_len) // ncv_len) % nwv_len]
        num_cores2   = num_cores_values[(((i // nwv_len) // ncv_len) // nwv_len) % ncv_len]
        num_workers1 = num_workers_values[((((i // nwv_len) // ncv_len) // nwv_len) // ncv_len) % nwv_len]
        num_cores1   = num_cores_values[(((((i // nwv_len) // ncv_len) // nwv_len) // ncv_len) // nwv_len) % ncv_len]

        dataset.append([num_cores1, num_workers1, num_cores2, num_workers2, num_cores3, num_workers3, np.mean([x + y + z for x, y, z in zip(data1[i].values, data2[i].values, data3[i].values)])])

        for j in range(1, len(data1[i])):
            raw_dataset.append([num_cores1, num_workers1, num_cores2, num_workers2, num_cores3, num_workers3, data1[i].values[j][0] + data2[i].values[j][0] + data3[i].values[j][0]])

    dataset = pd.DataFrame(dataset)
    dataset.columns = ["Num Cores 1", "Num Workers 1", "Num Cores 2", "Num Workers 2", "Num Cores 3", "Num Workers 3", "Total Runtime"]

    raw_dataset = pd.DataFrame(raw_dataset)
    raw_dataset.columns = ["Num Cores 1", "Num Workers 1", "Num Cores 2", "Num Workers 2", "Num Cores 3", "Num Workers 3", "Total Runtime"]





    min_record = dataset[dataset["Total Runtime"] == min(dataset["Total Runtime"])]

    nc1 = dataset[dataset["Total Runtime"] == min(dataset["Total Runtime"])]["Num Cores 1"].item()
    nc2 = dataset[dataset["Total Runtime"] == min(dataset["Total Runtime"])]["Num Cores 2"].item()
    nc3 = dataset[dataset["Total Runtime"] == min(dataset["Total Runtime"])]["Num Cores 3"].item()

    nw1 = dataset[dataset["Total Runtime"] == min(dataset["Total Runtime"])]["Num Workers 1"].item()
    nw2 = dataset[dataset["Total Runtime"] == min(dataset["Total Runtime"])]["Num Workers 2"].item()
    nw3 = dataset[dataset["Total Runtime"] == min(dataset["Total Runtime"])]["Num Workers 3"].item()

    min_record_value = min_record["Total Runtime"].item()





    if machine == "spa":
        number_of_tests = 144
        num_workers_max = 24
        num_cores_max   = 24

    elif machine == "XXXII":
        number_of_tests = 576
        num_workers_max = 48
        num_cores_max   = 48



    file_names = []

    for i in range(1, number_of_tests + 1): 
        file_names.append("test" + str(i) + "/output")

    data1 = []
    data2 = []
    data3 = []

    fetch_data("../optimal_threads/results/" + machine + "/optimal_threads_" + configurations[exp_number - 1][0] + "/", file_names, data1, ["Runtime"])
    fetch_data("../optimal_threads/results/" + machine + "/optimal_threads_" + configurations[exp_number - 1][1] + "/", file_names, data2, ["Runtime"])
    fetch_data("../optimal_threads/results/" + machine + "/optimal_threads_" + configurations[exp_number - 1][2] + "/", file_names, data3, ["Runtime"])
    
    dataset1 = []
    dataset2 = []
    dataset3 = []

    num_workers_min = 2
    num_workers_step = 2
    num_workers_values = range(num_workers_min, num_workers_max + num_workers_step, num_workers_step)
    nwv_len = len(num_workers_values)
    num_workers = num_workers_values[0]

    num_cores_min = 2
    num_cores_step = 2
    num_cores_values = range(num_cores_min, num_cores_max + num_cores_step, num_cores_step)
    ncv_len = len(num_cores_values)
    num_cores = num_cores_values[0]

    for i in range(len(data1)):
        num_workers = num_workers_values[i % nwv_len]
        num_cores = num_cores_values[(i // nwv_len) % ncv_len]

        if (num_cores % 4 == 0) & (num_workers % 4 == 0) & (num_cores <= 12) & (num_workers <= 12):
            dataset1.append([num_cores, num_workers, data1[i][1:].mean()[0]])

    for i in range(len(data2)):
        num_workers = num_workers_values[i % nwv_len]
        num_cores = num_cores_values[(i // nwv_len) % ncv_len]

        if (num_cores % 4 == 0) & (num_workers % 4 == 0) & (num_cores <= 12) & (num_workers <= 12):
            dataset2.append([num_cores, num_workers, data2[i][1:].mean()[0]])

    for i in range(len(data3)):
        num_workers = num_workers_values[i % nwv_len]
        num_cores = num_cores_values[(i // nwv_len) % ncv_len]

        if (num_cores % 4 == 0) & (num_workers % 4 == 0) & (num_cores <= 12) & (num_workers <= 12):
            dataset3.append([num_cores, num_workers, data3[i][1:].mean()[0]])

    dataset1 = pd.DataFrame(dataset1)
    dataset1.columns = ["Num Cores", "Num Workers", "Time"]

    dataset2 = pd.DataFrame(dataset2)
    dataset2.columns = ["Num Cores", "Num Workers", "Time"]

    dataset3 = pd.DataFrame(dataset2)
    dataset3.columns = ["Num Cores", "Num Workers", "Time"]





    independent_min_runtime1 = min(dataset1["Time"])
    independent_min_runtime2 = min(dataset2["Time"])
    independent_min_runtime3 = min(dataset2["Time"])

    inw1 = dataset1[dataset1["Time"] == independent_min_runtime1]["Num Workers"].item()
    inc1 = dataset1[dataset1["Time"] == independent_min_runtime1]["Num Cores"].item()

    inw2 = dataset2[dataset2["Time"] == independent_min_runtime2]["Num Workers"].item()
    inc2 = dataset2[dataset2["Time"] == independent_min_runtime2]["Num Cores"].item()

    inw3 = dataset3[dataset3["Time"] == independent_min_runtime3]["Num Workers"].item()
    inc3 = dataset3[dataset3["Time"] == independent_min_runtime3]["Num Cores"].item()

    imin_record = dataset.loc[(dataset["Num Cores 1"] == inc1) & (dataset["Num Cores 2"] == inc2) & (dataset["Num Cores 3"] == inc3) & (dataset["Num Workers 1"] == inw1) & (dataset["Num Workers 2"] == inw2) & (dataset["Num Workers 3"] == inw3)]
    
    imin_record_value = imin_record["Total Runtime"].item()





    idata = raw_dataset[(raw_dataset["Num Cores 1"] == inc1) & (raw_dataset["Num Cores 2"] == inc2) & (raw_dataset["Num Cores 3"] == inc3) & (raw_dataset["Num Workers 1"] == inw1) & (raw_dataset["Num Workers 2"] == inw2) & (raw_dataset["Num Workers 3"] == inw3)]
    data  = raw_dataset[(raw_dataset["Num Cores 1"] == nc1)  & (raw_dataset["Num Cores 2"] == nc2)  & (raw_dataset["Num Cores 3"] == nc3)  & (raw_dataset["Num Workers 1"] == nw1)  & (raw_dataset["Num Workers 2"] == nw2)  & (raw_dataset["Num Workers 3"] == nw3)]
    
    table_data.append([configurations[exp_number - 1][0], configurations[exp_number - 1][1], configurations[exp_number - 1][2], str(mean_confidence_interval(idata["Total Runtime"].values)), str(mean_confidence_interval(data["Total Runtime"].values)), str(round(imin_record_value / min_record_value, 2))])
    
    print(' '.join(configurations[exp_number - 1]) + " completed")

cpu_small cpu_small cpu_small completed
cpu_small cpu_small cpu_large completed
cpu_small cpu_small vm_small completed
cpu_small cpu_small vm_large completed
cpu_small cpu_large cpu_large completed



In [7]:
print(tabulate(table_data, headers = ["Program 1", "Program 2", "Program 3", "Independent Runtime", "Contention Aware Runtime", "Speedup"], tablefmt = 'orgtbl'))

| Program 1   | Program 2   | Program 3   | Independent Runtime   | Contention Aware Runtime   |   Speedup |
|-------------+-------------+-------------+-----------------------+----------------------------+-----------|
| cpu_small   | cpu_small   | cpu_small   | (292, 298, 305)       | (239, 251, 263)            |      1.18 |
| cpu_small   | cpu_small   | cpu_large   | (267, 271, 276)       | (223, 236, 249)            |      1.14 |
| cpu_small   | cpu_small   | vm_small    | (1117, 1144, 1172)    | (812, 826, 840)            |      1.39 |
| cpu_small   | cpu_small   | vm_large    | (3333, 3390, 3447)    | (1548, 1602, 1655)         |      2.12 |
| cpu_small   | cpu_large   | cpu_large   | (231, 236, 240)       | (206, 209, 212)            |      1.13 |
