In [5]:


import regex as re
import numpy as np
from statistics import mean, stdev
import matplotlib
import random
import matplotlib.pyplot as plt
#from bento_bench import bar_plot, parse_elapsed_secs, get_avg_from_table_str, set_size
import pandas as pd
import json
import os
import seaborn as sns
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
    "font.size": 7,
})

fio_iops_avg_regex = "(?<=^\s*iops\s*: min=\d*, max=\d*, avg=)\d+\.\d*"
fio_iops_stdev_regex = "(?<=^\s*iops\s*: min=\d*, max=\d*, avg=\d*\.*\d*, stdev=)\d+\.\d*"

# Some are in KiB, some in MiB....
fio_bw_kib_avg_regex"(?<=^\s*bw \(  KiB\/s\): min=\d*, max=\d*, per=\d*\.*\d*%, avg=)\d+\.\d*"
fio_bw_mib_avg_regex"(?<=^\s*bw \(  MiB\/s\): min=\d*, max=\d*, per=\d*\.*\d*%, avg=)\d+\.\d*"
fio_bw_kib_stdev_regex"(?<=^\s*bw \(  KiB\/s\): min=\d*, max=\d*, per=\d*\.*\d*%, avg=\d*\.*\d*, stdev=)\d+\.\d*"
fio_bw_mib_stdev_regex"(?<=^\s*bw \(  MiB\/s\): min=\d*, max=\d*, per=\d*\.*\d*%, avg=\d*\.*\d*, stdev=)\d+\.\d*"


In [2]:
def get_instructions(df):
    return df.loc[df['event'] == 'instructions', ['val']].iloc[0].item()

def get_cpus(df):
    return df.loc[df['event'] == 'cpu-clock', ['metric_val']].iloc[0].item()

def get_cycles(df):
    return df.loc[df['event'] == 'cycles', ['val']].iloc[0].item()

# We assume here that all the events have been measured for an equal amount of time
def get_time_measured(df):
    return df.loc[df['event'] == 'cpu-clock', ['time_measured']].iloc[0].item() / get_cpus(df)

def get_ipc(df):
    return df.loc[df['event'] == 'instructions', ['metric_val']].iloc[0].item()

def get_branches(df):
    return df.loc[df['event'] == 'branches', ['val']].iloc[0].item()

def get_branch_missrate(df):
    return df.loc[df['event'] == 'branch-misses', ['metric_val']].iloc[0].item()

def get_val(df, metric):
    return df.loc[df['event'] == metric, ['val']].iloc[0].item()

In [43]:
perf_stat_colnames = [ "val", "unit", "event", "time_measured", "perctage_measured", "metric_val", "metric_unit"]
targets = {
    "NFS": "./nfs/73cc3a3_zac15_2023-02-25_00:21:44/",
    "VNFS": "./nfs/73cc3a3_zac15_2023-02-25_00:21:44/",
}
metrics = ["Instructions/OP", "IPC", "Branch missrate"]
# perf time_measured is in nsec
runtime = 600 * 10**9
perf_nrep = 3

perf_load = []
iops = []
table = pd.DataFrame(index=metrics)
for i, (conf, folder) in enumerate(targets.items()):
    iops_l = pd.Series(dtype="float")
    for i in range(1, perf_nrep+1):
        f = open(folder + "cpu_load_fio_" + str(i) + ".out")
        matches = re.findall(fio_iops_avg_regex, f.read(), re.MULTILINE)
        iops_l = pd.concat((iops_l, pd.Series(np.array(matches).astype(float).sum())))
    iops = iops_l.mean()

    perf_baseline = pd.DataFrame(columns=["instructions"])
    for i in range(1, perf_nrep+1):
        f = open(folder + "cpu_baseline_perf_" + str(i) + ".out")
        # We skip the first two lines, those are perf event disabled and enabled
        f.readline()
        f.readline()
        raw = pd.read_csv(f, names=perf_stat_colnames)
        row = {
            "instructions": get_instructions(raw)
        }
        perf_baseline = pd.concat((perf_baseline, pd.DataFrame([row])), ignore_index=True)

    perf_load = pd.DataFrame(columns=["instructions", "ipc", "branch_missrate"])
    for i in range(1, perf_nrep+1):
        f = open(folder + "cpu_load_perf_" + str(i) + ".out")
        # We skip the first two lines, those are perf event disabled and enabled
        f.readline()
        f.readline()
        raw = pd.read_csv(f, names=perf_stat_colnames)
        row = {
            "instructions": get_instructions(raw),
            "ipc": get_ipc(raw),
            "branch_missrate": get_branch_missrate(raw)
        }
        perf_load = pd.concat((perf_load, pd.DataFrame([row])), ignore_index=True)

    perf_baseline = perf_baseline.mean()
    perf_load = perf_load.mean()
    
    row = {
        metrics[0]: (perf_load[0] - perf_baseline[0]) / runtime / iops,
        metrics[1]: perf_load[1],
        metrics[2]: perf_load[2]
    }
    table = table.join(pd.DataFrame([row]).transpose())
    table = table.rename(columns={ 0: conf })

print(table)
table.style.to_latex("cpu_analysis.tex")
#get_branches(perf_load[0]) - get_branches(perf_baseline[0])
#get_branch_misses(perf_load[0]) - get_branch_misses(perf_baseline[0])
#( - get_instructions(perf_baseline[0])) / iops[0]
#
#
#instructions = [
#    get_instructions(perf_baseline[0]) / get_time_measured(perf_baseline[0]) * runtime,
#    get_instructions(perf_load[0]) / get_time_measured(perf_load[0]) * runtime
#]

                      NFS      VNFS
Instructions/OP  0.000087  0.000087
IPC              0.570000  0.570000
Branch missrate  1.980000  1.980000


In [45]:
target = "nulldev"
# TODO
folder = "./nfs/73cc3a3_zac15_2023-02-25_00:21:44/"

cols = ["RW", "BS", "QD", "P", "avg", "stderr"]

for rw in ["randread", "randwrite"]:
    for bs in ["1", "4k", "8k", "16k", "32k", "64k", "128k"]:
        for qd in ["1", "2", "4", "8", "16", "32", "64", "128"]:
            p="1"
            path = "fio_" + rw + "_" + bs + "_" + qd + "_" + p + ".out"
            f = open(folder + path)
            matches = re.findall(fio_iops_avg_regex, f.read(), re.MULTILINE)
            print(np.array(matches).astype(float)[0])

IndexError: index 0 is out of bounds for axis 0 with size 0