In [1]:
import os
import re
import pandas as pd

In [2]:
results_folder = [os.pardir, "Benchmark results", "halloc_opts"]
# results_folder = [os.pardir,"Benchmark results","halloc_opts","halloc_results_1"]
# results_folder = [os.pardir,"Benchmark results","halloc_opts","halloc_results_crash"]
# results_folder = [os.pardir,"Benchmark results"]
print(os.path.join(*results_folder))

../Benchmark results/halloc_opts


In [3]:
parsable_keys = {
    "BATCH_SIZE": "bat-",
    "HALLOC_FRACTION": "hf-",
    "BUSY_FRACTION": "bu-",
    "ROOMY_FRACTION": "ro-",
    "SPARSE_FRACTION": "sp-",
    "MAX_BLOCK_SZ": "mb-",
    "MAX_NSIZES": "ns-",
    "MAX_NCHUNK_IDS": "nc-",
    "BLOCK_STEP": "bst-",
    "NUNITS": "nu-",
    "MIN_BLOCK_SZ": "mibs-",
    "MAX_BLOCK_SZ2": "mabs-",
}

In [4]:
default_params = {
    "BATCH_SIZE": 32,
    "HALLOC_FRACTION": 0.25,
    "BUSY_FRACTION": 0.835,
    "ROOMY_FRACTION": 0.6,
    "SPARSE_FRACTION": 0.012,
    "MAX_BLOCK_SZ": 3072,
    "MAX_NSIZES": 16,
    "MAX_NCHUNK_IDS": 8,
    "BLOCK_STEP": 16,
    "NUNITS": 8,
    "MIN_BLOCK_SZ": 16,
}

In [5]:
df_types = {
	"ALLOCATOR": "category",
	"NETWORK": "category",
	"PROFILING": bool,
    "BATCH_SIZE": int,
    "HALLOC_FRACTION": float,
    "BUSY_FRACTION": float,
    "ROOMY_FRACTION": float,
    "SPARSE_FRACTION": float,
    "MAX_BLOCK_SZ": int,
    "MAX_NSIZES": int,
    "MAX_NCHUNK_IDS": int,
    "BLOCK_STEP": int,
    "NUNITS": int,
    "MIN_BLOCK_SZ": int,
}

In [6]:
expr = r"total\s+images\/sec:\s+(\d+\.*\d*)"
r = re.compile(expr)

In [7]:
cols = ["ALLOCATOR", "NETWORK", "IMAGES_SEC", "PROFILING"] + list(default_params.keys())
df = pd.DataFrame(columns=cols)
print(df)

Empty DataFrame
Columns: [ALLOCATOR, NETWORK, IMAGES_SEC, PROFILING, BATCH_SIZE, HALLOC_FRACTION, BUSY_FRACTION, ROOMY_FRACTION, SPARSE_FRACTION, MAX_BLOCK_SZ, MAX_NSIZES, MAX_NCHUNK_IDS, BLOCK_STEP, NUNITS, MIN_BLOCK_SZ]
Index: []


In [8]:
for file in os.listdir(os.path.join(*results_folder)):
    # print(file)
    file_name = os.path.splitext(file)
    if file_name[-1] == ".txt":
        params = default_params.copy()
        # print(file_name[0].split('_'))

        file_split = file_name[0].split("_")
        params["ALLOCATOR"] = file_split.pop(0)
        params["NETWORK"] = file_split.pop(0)
        params["PROFILING"] = "prof" in file_split
        for part in file_split:
            for key, value in parsable_keys.items():
                if value in part:
                    # print(key, part.split('-')[-1])
                    if key == "MAX_BLOCK_SZ2":
                        key = "MAX_BLOCK_SZ"
                    params[key] = float(part.split("-")[-1])
        # print(params)

        matches = [
            r.findall(line) for line in open(os.path.join(*results_folder, file))
        ]
        matches = [float(x[0]) for x in matches if len(x)]
        # print(matches)

        if len(matches):
            for match in matches:
                params["IMAGES_SEC"] = match
                df = df.append(params, ignore_index=True)
        else:
            params["IMAGES_SEC"] = float("NaN")
            df = df.append(params, ignore_index=True)

In [9]:
df = df.astype(df_types)

In [26]:
df.dropna().sort_values(by=['IMAGES_SEC'])

Unnamed: 0,ALLOCATOR,NETWORK,IMAGES_SEC,PROFILING,BATCH_SIZE,HALLOC_FRACTION,BUSY_FRACTION,ROOMY_FRACTION,SPARSE_FRACTION,MAX_BLOCK_SZ,MAX_NSIZES,MAX_NCHUNK_IDS,BLOCK_STEP,NUNITS,MIN_BLOCK_SZ
159,bfc,resnet50,5.82,False,16,0.25,0.835,0.60,0.012,3072,16,8,16,8,16
160,bfc,resnet50,5.82,False,16,0.25,0.835,0.60,0.012,3072,16,8,16,8,16
0,halloc,alexnet,33.84,False,32,0.25,0.900,0.60,0.012,3072,16,8,16,8,16
180,halloc,alexnet,33.92,False,32,0.25,0.835,0.50,0.012,3072,16,8,16,8,16
56,halloc,alexnet,34.14,False,32,0.25,0.835,0.55,0.012,3072,16,8,16,8,16
140,halloc,alexnet,34.71,False,32,0.25,0.835,0.60,0.012,3072,16,8,16,8,16
165,halloc,alexnet,35.07,False,32,0.25,0.850,0.60,0.012,3072,16,8,16,8,16
8,halloc,alexnet,35.23,False,32,0.25,0.835,0.60,0.012,1024,16,8,16,8,16
37,halloc,alexnet,35.57,False,32,0.25,0.800,0.60,0.012,3072,16,8,16,8,16
90,halloc,alexnet,35.78,False,32,0.40,0.835,0.60,0.012,3072,16,8,16,8,16


In [11]:
df.to_csv(os.path.join(*results_folder, "results.csv"))

In [12]:
df.describe()

Unnamed: 0,IMAGES_SEC,BATCH_SIZE,HALLOC_FRACTION,BUSY_FRACTION,ROOMY_FRACTION,SPARSE_FRACTION,MAX_BLOCK_SZ,MAX_NSIZES,MAX_NCHUNK_IDS,BLOCK_STEP,NUNITS,MIN_BLOCK_SZ
count,75.0,198.0,198.0,198.0,198.0,198.0,198.0,198.0,198.0,198.0,198.0,198.0
mean,35.3192,24.0,0.277778,0.833232,0.603535,0.021899,3754.505051,19.707071,8.717172,17.282828,7.909091,19.707071
std,5.027033,8.020279,0.131802,0.017688,0.029593,0.034808,2996.477043,27.031192,6.232452,12.572664,1.291054,27.031192
min,5.82,16.0,0.0,0.7,0.5,0.012,0.0,1.0,1.0,1.0,1.0,1.0
25%,35.975,16.0,0.25,0.835,0.6,0.012,3072.0,16.0,8.0,16.0,8.0,16.0
50%,36.11,24.0,0.25,0.835,0.6,0.012,3072.0,16.0,8.0,16.0,8.0,16.0
75%,36.225,32.0,0.25,0.835,0.6,0.012,3072.0,16.0,8.0,16.0,8.0,16.0
max,41.78,32.0,1.0,0.9,0.8,0.2,16384.0,256.0,64.0,128.0,16.0,256.0


In [13]:
df[((df.NETWORK == "alexnet") & (df.SPARSE_FRACTION != default_params["SPARSE_FRACTION"]))]
# df[((df.NETWORK == "alex") & (df.NUNITS != default_params["NUNITS"]))]

Unnamed: 0,ALLOCATOR,NETWORK,IMAGES_SEC,PROFILING,BATCH_SIZE,HALLOC_FRACTION,BUSY_FRACTION,ROOMY_FRACTION,SPARSE_FRACTION,MAX_BLOCK_SZ,MAX_NSIZES,MAX_NCHUNK_IDS,BLOCK_STEP,NUNITS,MIN_BLOCK_SZ
39,halloc,alexnet,36.13,False,32,0.25,0.835,0.6,0.2,3072,16,8,16,8,16
48,halloc,alexnet,35.97,False,32,0.25,0.835,0.6,0.14,3072,16,8,16,8,16
50,halloc,alexnet,36.25,False,32,0.25,0.835,0.6,0.02,3072,16,8,16,8,16
55,halloc,alexnet,36.05,False,32,0.25,0.835,0.6,0.18,3072,16,8,16,8,16
57,halloc,alexnet,36.04,False,32,0.25,0.835,0.6,0.12,3072,16,8,16,8,16
95,halloc,alexnet,36.07,False,32,0.25,0.835,0.6,0.08,3072,16,8,16,8,16
96,halloc,alexnet,35.96,False,32,0.25,0.835,0.6,0.16,3072,16,8,16,8,16
132,halloc,alexnet,36.12,False,32,0.25,0.835,0.6,0.1,3072,16,8,16,8,16
161,halloc,alexnet,36.01,False,32,0.25,0.835,0.6,0.04,3072,16,8,16,8,16
168,halloc,alexnet,35.99,False,32,0.25,0.835,0.6,0.06,3072,16,8,16,8,16


In [21]:
df[df.NETWORK == "alexnet"].describe()

Unnamed: 0,IMAGES_SEC,BATCH_SIZE,HALLOC_FRACTION,BUSY_FRACTION,ROOMY_FRACTION,SPARSE_FRACTION,MAX_BLOCK_SZ,MAX_NSIZES,MAX_NCHUNK_IDS,BLOCK_STEP,NUNITS,MIN_BLOCK_SZ
count,73.0,99.0,99.0,99.0,99.0,99.0,99.0,99.0,99.0,99.0,99.0,99.0
mean,36.127397,32.0,0.277778,0.833232,0.603535,0.021899,3754.505051,19.707071,8.717172,17.282828,7.909091,19.707071
std,1.066996,0.0,0.132137,0.017733,0.029669,0.034896,3004.111391,27.100061,6.248331,12.604697,1.294343,27.100061
min,33.84,32.0,0.0,0.7,0.5,0.012,0.0,1.0,1.0,1.0,1.0,1.0
25%,35.99,32.0,0.25,0.835,0.6,0.012,3072.0,16.0,8.0,16.0,8.0,16.0
50%,36.12,32.0,0.25,0.835,0.6,0.012,3072.0,16.0,8.0,16.0,8.0,16.0
75%,36.23,32.0,0.25,0.835,0.6,0.012,3072.0,16.0,8.0,16.0,8.0,16.0
max,41.78,32.0,1.0,0.9,0.8,0.2,16384.0,256.0,64.0,128.0,16.0,256.0


In [22]:
# df[df.ALLOCATOR == "cumalloc"].describe()
# df[df.ALLOCATOR == "bfc"].describe()
df[((df.ALLOCATOR == "cumalloc") | (df.ALLOCATOR == "bfc"))]

Unnamed: 0,ALLOCATOR,NETWORK,IMAGES_SEC,PROFILING,BATCH_SIZE,HALLOC_FRACTION,BUSY_FRACTION,ROOMY_FRACTION,SPARSE_FRACTION,MAX_BLOCK_SZ,MAX_NSIZES,MAX_NCHUNK_IDS,BLOCK_STEP,NUNITS,MIN_BLOCK_SZ
100,bfc,alexnet,41.65,False,32,0.25,0.835,0.6,0.012,3072,16,8,16,8,16
101,bfc,alexnet,41.78,False,32,0.25,0.835,0.6,0.012,3072,16,8,16,8,16
159,bfc,resnet50,5.82,False,16,0.25,0.835,0.6,0.012,3072,16,8,16,8,16
160,bfc,resnet50,5.82,False,16,0.25,0.835,0.6,0.012,3072,16,8,16,8,16


In [23]:
from sklearn import linear_model
from numpy import isnan

# df2 = df[df.NETWORK == "alex"]
df2 = df[((df.ALLOCATOR == "halloc") & (df.NETWORK == "alexnet") & (~isnan(df.IMAGES_SEC)))]

reg = linear_model.LinearRegression()
reg.fit(
    df2[
        [
            "PROFILING",
            "HALLOC_FRACTION",
            "BUSY_FRACTION",
            "ROOMY_FRACTION",
            "SPARSE_FRACTION",
            "MAX_BLOCK_SZ",
            "MAX_NSIZES",
            "MAX_NCHUNK_IDS",
            "BLOCK_STEP",
            "NUNITS",
            "MIN_BLOCK_SZ",
        ]
    ],
    df2["IMAGES_SEC"],
)

print(reg.coef_)
print(reg.intercept_)

[ 0.00000000e+00  3.14582033e-01 -4.23883393e+00  4.51848471e+00
  1.50301267e+00 -6.75545215e-05  1.47764885e-03  7.59503761e-03
  1.61029345e-03  0.00000000e+00  2.10705025e-03]
36.642822834440125


In [24]:
import statsmodels.formula.api as smf

# formula = 'IMAGES_SEC ~ PROFILING + HALLOC_FRACTION + BUSY_FRACTION + ROOMY_FRACTION + SPARSE_FRACTION + MAX_BLOCK_SZ + MAX_NSIZES + MAX_NCHUNK_IDS + BLOCK_STEP + NUNITS + MIN_BLOCK_SZ'
# formula = 'IMAGES_SEC ~ HALLOC_FRACTION + BUSY_FRACTION + ROOMY_FRACTION + SPARSE_FRACTION + MAX_BLOCK_SZ + MAX_NSIZES + MAX_NCHUNK_IDS + BLOCK_STEP + NUNITS + MIN_BLOCK_SZ'
formula = 'IMAGES_SEC ~ HALLOC_FRACTION + BUSY_FRACTION + ROOMY_FRACTION + SPARSE_FRACTION + MAX_BLOCK_SZ + MAX_NSIZES + MAX_NCHUNK_IDS + BLOCK_STEP + MIN_BLOCK_SZ'

result = smf.ols(formula=formula, data=df2).fit()

print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             IMAGES_SEC   R-squared:                       0.159
Model:                            OLS   Adj. R-squared:                  0.035
Method:                 Least Squares   F-statistic:                     1.278
Date:                Thu, 20 Dec 2018   Prob (F-statistic):              0.267
Time:                        17:26:20   Log-Likelihood:                -45.420
No. Observations:                  71   AIC:                             110.8
Df Residuals:                      61   BIC:                             133.5
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          36.6428      2.561     

In [25]:
import statsmodels.formula.api as smf

# formula = 'IMAGES_SEC ~ PROFILING + HALLOC_FRACTION + BUSY_FRACTION + ROOMY_FRACTION + SPARSE_FRACTION + MAX_BLOCK_SZ + MAX_NSIZES + MAX_NCHUNK_IDS + BLOCK_STEP + NUNITS + MIN_BLOCK_SZ'
formula = 'IMAGES_SEC ~ HALLOC_FRACTION + BUSY_FRACTION + ROOMY_FRACTION + SPARSE_FRACTION'

result = smf.ols(formula=formula, data=df2).fit()

print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             IMAGES_SEC   R-squared:                       0.115
Model:                            OLS   Adj. R-squared:                  0.061
Method:                 Least Squares   F-statistic:                     2.135
Date:                Thu, 20 Dec 2018   Prob (F-statistic):             0.0862
Time:                        17:26:20   Log-Likelihood:                -47.232
No. Observations:                  71   AIC:                             104.5
Df Residuals:                      66   BIC:                             115.8
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          36.5211      2.516     