In [42]:


import regex as re
import numpy as np
from statistics import mean, stdev
import matplotlib
import random
import matplotlib.pyplot as plt
#from bento_bench import bar_plot, parse_elapsed_secs, get_avg_from_table_str, set_size
import pandas as pd
import json
import os
import seaborn as sns
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
    "font.size": 7,
})

fio_iops_avg_regex = "(?<=^\s*iops\s*: min=\s*\d*, max=\s*\d*, avg=\s*)\d+\.\d*"
fio_iops_stdev_regex = "(?<=^\s*iops\s*: min=\s*\d*, max=\s*\d*, avg=\s*\d*\.*\d*, stdev=)\d+\.\d*"

# Some are in KiB, some in MiB....
fio_bw_kib_avg_regex = "(?<=^\s*bw \(  KiB\/s\): min=\s*\d*, max=\s*\d*, per=\s*\d*\.*\d*%, avg=\s*)\d+\.\d*"
fio_bw_mib_avg_regex = "(?<=^\s*bw \(  MiB\/s\): min=\s*\d*, max=\s*\d*, per=\s*\d*\.*\d*%, avg=\s*)\d+\.\d*"
fio_bw_kib_stdev_regex = "(?<=^\s*bw \(  KiB\/s\): min=\s*\d*, max=\s*\d*, per=\s*\d*\.*\d*%, avg=\s*\d*\.*\d*, stdev=\s*)\d+\.\d*"
fio_bw_mib_stdev_regex = "(?<=^\s*bw \(  MiB\/s\): min=\s*\d*, max=\s*\d*, per=\s*\d*\.*\d*%, avg=\s*\d*\.*\d*, stdev=\s*)\d+\.\d*"


In [32]:
def get_instructions(df):
    return df.loc[df['event'] == 'instructions', ['val']].iloc[0].item()
def get_cpus(df):
    return df.loc[df['event'] == 'cpu-clock', ['metric_val']].iloc[0].item()
def get_cycles(df):
    return df.loc[df['event'] == 'cycles', ['val']].iloc[0].item()
# We assume here that all the events have been measured for an equal amount of time
def get_time_measured(df):
    return df.loc[df['event'] == 'cpu-clock', ['time_measured']].iloc[0].item() / get_cpus(df)
def get_ipc(df):
    return df.loc[df['event'] == 'instructions', ['metric_val']].iloc[0].item()
def get_branches(df):
    return df.loc[df['event'] == 'branches', ['val']].iloc[0].item()
def get_branch_missrate(df):
    return df.loc[df['event'] == 'branch-misses', ['metric_val']].iloc[0].item()
def get_val(df, metric):
    return df.loc[df['event'] == metric, ['val']].iloc[0].item()
def get_l1_dcache_missrate(df):
    return df.loc[df['event'] == 'L1-dcache-load-misses', ['metric_val']].iloc[0].item()
def get_dtlb_missrate(df):
    return df.loc[df['event'] == 'dTLB-loads-misses', ['metric_val']].iloc[0].item()

In [44]:
perf_stat_colnames = [ "val", "unit", "event", "time_measured", "perctage_measured", "metric_val", "metric_unit"]
targets = {
    "NFS": "./nfs/a906294_zac15_2023-02-26_20:26:58/",
    "VNFS": "./nfs/a906294_zac15_2023-02-26_20:26:58/",
}
metrics = ["Instructions/OP", "IPC", "Branch missrate", "L1DCache missrate", "dTLB missrate"]
# perf time_measured is in nsec
runtime = 600
perf_nrep = 5

perf_load = []
iops = []
table = pd.DataFrame(index=metrics)
for i, (conf, folder) in enumerate(targets.items()):
    T1_iops_l = pd.Series(dtype="float")
    for i in range(1, perf_nrep+1):
        f = open(folder + "cpu_load_T1_fio_" + str(i) + ".out")
        matches = re.findall(fio_iops_avg_regex, f.read(), re.MULTILINE)
        T1_iops_l = pd.concat((T1_iops_l, pd.Series(np.array(matches).astype(float).sum())))
    T1_iops = T1_iops_l.mean()

    perf_baseline = pd.DataFrame(columns=["instructions"])
    for i in range(1, perf_nrep+1):
        f = open(folder + "cpu_baseline_perf_" + str(i) + ".out")
        # We skip the first two lines, those are perf event disabled and enabled
        f.readline()
        f.readline()
        raw = pd.read_csv(f, names=perf_stat_colnames)
        row = {
            "instructions": get_instructions(raw)
        }
        perf_baseline = pd.concat((perf_baseline, pd.DataFrame([row])), ignore_index=True)

    perf_load = pd.DataFrame(columns=["instructions", "ipc", "branch_missrate"])
    for i in range(1, perf_nrep+1):
        T1_f = open(folder + "cpu_load_T1_perf_" + str(i) + ".out")
        # We skip the first two lines, those are perf event disabled and enabled
        T1_f.readline()
        T1_f.readline()
        T1_raw = pd.read_csv(T1_f, names=perf_stat_colnames)
        T2_f = open(folder + "cpu_load_T2_perf_" + str(i) + ".out")
        # We skip the first two lines, those are perf event disabled and enabled
        T2_f.readline()
        T2_f.readline()
        T2_raw = pd.read_csv(T2_f, names=perf_stat_colnames)
        T3_f = open(folder + "cpu_load_T3_perf_" + str(i) + ".out")
        # We skip the first two lines, those are perf event disabled and enabled
        T3_f.readline()
        T3_f.readline()
        T3_raw = pd.read_csv(T3_f, names=perf_stat_colnames)
        row = {
            "instructions": get_instructions(T1_raw),
            "ipc": get_ipc(T1_raw),
            "branch_missrate": get_branch_missrate(T1_raw),
            "l1": get_l1_dcache_missrate(T2_raw),
            "dtlb": get_dtlb_missrate(T3_raw),
        }
        perf_load = pd.concat((perf_load, pd.DataFrame([row])), ignore_index=True)

    perf_baseline = perf_baseline.mean()
    perf_load = perf_load.mean()
    
    row = {
        metrics[0]: round((perf_load[0] - perf_baseline[0]) / runtime / T1_iops),
        metrics[1]: perf_load[1],
        metrics[2]: perf_load[2],
        metrics[3]: perf_load[3],
        metrics[4]: perf_load[4]
    }
    table = table.join(pd.DataFrame([row]).transpose())
    table = table.rename(columns={ 0: conf })

print(table)
table.style.to_latex("cpu_analysis.tex")
#get_branches(perf_load[0]) - get_branches(perf_baseline[0])
#get_branch_misses(perf_load[0]) - get_branch_misses(perf_baseline[0])
#( - get_instructions(perf_baseline[0])) / iops[0]
#
#
#instructions = [
#    get_instructions(perf_baseline[0]) / get_time_measured(perf_baseline[0]) * runtime,
#    get_instructions(perf_load[0]) / get_time_measured(perf_load[0]) * runtime
#]

                         NFS       VNFS
Instructions/OP    88453.000  88453.000
IPC                    0.570      0.570
Branch missrate        2.016      2.016
L1DCache missrate      8.822      8.822
dTLB missrate          0.136      0.136


In [56]:
target = "nulldev"
# TODO
folder = "./nfs/a906294_zac15_2023-02-26_20:26:58/"

cols = ["RW", "BS", "QD", "P", "avg", "stderr"]

bw = pd.DataFrame(columns=cols)
iops = pd.DataFrame(columns=cols)
lat = pd.DataFrame(columns=cols)

for rw in ["randread", "randwrite"]:
    for bs in ["1", "4k", "8k", "16k", "32k", "64k", "128k"]:
        for qd in ["1", "2", "4", "8", "16", "32", "64", "128"]:
            p="2"
            path = "fio_" + rw + "_" + bs + "_" + qd + "_" + p + ".out"
            f = open(folder + path)
            f_buf = f.read()
            avg_matches = re.findall(fio_iops_avg_regex, f_buf, re.MULTILINE)
            stdev_matches = re.findall(fio_iops_stdev_regex, f_buf, re.MULTILINE)
            row = {
                "RW": rw,
                "BS": bs,
                "QD": qd,
                "P": p,
                "avg": np.array(avg_matches).astype(float)[0],
                "stderr": np.array(stdev_matches).astype(float)[0]
            }
            iops = pd.concat((iops, pd.DataFrame([row])), ignore_index=True)

print(iops)

            RW    BS   QD  P       avg   stderr
0     randread     1    1  2  14169.53   237.79
1     randread     1    2  2  34956.51   912.28
2     randread     1    4  2  50059.61  1440.20
3     randread     1    8  2  58901.30  2518.90
4     randread     1   16  2  65303.78  1755.31
..         ...   ...  ... ..       ...      ...
107  randwrite  128k    8  2  14725.54   245.68
108  randwrite  128k   16  2  16428.62  1205.90
109  randwrite  128k   32  2  13862.57   145.99
110  randwrite  128k   64  2  13975.97   129.79
111  randwrite  128k  128  2  15185.42   521.24

[112 rows x 6 columns]
