In [104]:
import csv
import pandas as pd
from glob import glob
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [91]:
task_files = glob("task*.csv")

In [92]:
def format_data(data):
    result = []
    for i in data:
        r = [k.strip(",") for k in i.strip("\n").strip(" ").strip(",").split(":")]
        r[1] = float(r[1])
        result.append(r)
    return result

In [93]:
def prepare_data(task_files):
    tasks = {t: open(t) for t in task_files}
    raw_data = {t: v.readlines() for t, v in tasks.items()}
    data_dict = [{
        "file_name": n,
        "test_name": i[1].strip("\n").strip(".").strip("Test Name:,"),
        "data": format_data(i[2:])
    } for n, i in raw_data.items()]
    data_dict = sorted(data_dict, key=lambda x: x["file_name"])
    print("Test types:\n", set([i['test_name'] for i in data_dict]))
    print("")
    print("File Names:\n", list([i['file_name'] for i in data_dict]))
    return data_dict

In [94]:
data_dict = prepare_data(task_files)

Test types:
 {'4.12.0-standard', '4.12.0-patch', '4.12.0-config', '4.15.0-101-generic'}

File Names:
 ['task1-1.csv', 'task1-2.csv', 'task1-3.csv', 'task1-4.csv', 'task2-1.csv', 'task2-2.csv', 'task2-3.csv', 'task2-4.csv', 'task3-1.csv', 'task3-2.csv', 'task3-3.csv', 'task3-4.csv', 'task4-1.csv', 'task4-2.csv', 'task4-3.csv', 'task4-4.csv']


In [95]:
cols = [i[0] for i in data_dict[0]["data"]]

In [96]:
heat_map_data = [[j[1] for j in i["data"]] for i in data_dict]

In [85]:
relative_boost = [[v / heat_map_data[1][j] for j, v in enumerate(u)] for i, u in enumerate(heat_map_data)]

In [86]:
test_names = [i["test_name"] for i in data_dict]

In [87]:
file_name = [i["file_name"] for i in data_dict]

In [98]:
df = pd.DataFrame(heat_map_data, columns=cols)
df["test_names"] = test_names
df["file_name"] = file_name
df
absolute_df = df

In [99]:
df = pd.DataFrame(relative_boost, columns=cols)
df["test_names"] = test_names
df["file_name"] = file_name
df
relative_df = df

In [108]:
absolute_df.to_csv("result-absolute.csv")

In [109]:
relative_df.to_csv("result-relative.csv")

In [133]:
intask_df = [absolute_df[absolute_df["file_name"].str.contains(f"task{i}-")] for i in range(1,5)]

In [143]:
crosstask_df = [absolute_df[absolute_df["file_name"].str.contains(f"-{i}.csv")] for i in range(1,5)]

In [187]:
def get_normalized(df):
    a = df[0]
    b = a[[i for i in list(a.columns) if i not in ["test_names", "file_name"]]]
    c = b.div(b[:1].sum())
    return c

In [146]:
a = crosstask_df[0]

In [147]:
b = crosstask_df[1:]

In [157]:
a 