In [1]:
import json
from operator import index
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import pickle
import time

from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import ExtraTreesRegressor
import xgboost as xgb
import tensorflow as tf

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings('once', '.*sliced data.*', )
warnings.filterwarnings('once', '.*Maximum iterations*', )

2022-03-10 11:53:16.876028: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-10 11:53:16.876095: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
def prepare_dataset(path : str, workload_name : str):
    dataset = {}
    files = os.listdir(path)

    for file in files:
        with open(path+"/"+file, "r") as f:
            raw_str = f.read()
        data = json.loads(raw_str)

        if "layers" in data.keys():
            del data["layers"]

        if "relay" in data.keys():
            del data["relay"]

        if "conv2d" in workload_name:
            data["kernel_0"] = data["kernel"][0]
            data["kernel_1"] = data["kernel"][1]
            del data["kernel"]
            data["dilation_0"] = data["dilation"][0]
            data["dilation_1"] = data["dilation"][1]
            del data["dilation"]

            if not "kernel layout" in data.keys():
                data["kernel layout"] = "OIHW"

        elif workload_name == "dense":
            data["features"] = data["input shape"][1]
            del data["input shape"]
            del data["output shape"]
            #print()

        elif workload_name in ["max_pool2d", "avg_pool2d"]:
            data["pool_0"] = data["pool_size"][0]
            data["pool_1"] = data["pool_size"][1]
            del data["pool_size"]

        if workload_name in ["max_pool2d", "avg_pool2d", "conv2d", "dilated_conv2d", "depthwise_conv2d"]:
            del data["padding"]
            data["C_I"] = data["input shape"][3]
            data["H_I"] = data["input shape"][1]
            data["W_I"] = data["input shape"][2]
            del data["input shape"]

            data["C_O"] = data["output shape"][3]
            data["H_O"] = data["output shape"][1]
            data["W_O"] = data["output shape"][2]
            del data["output shape"]

            key = "strides"
            if "stride" in data.keys():
                key = "stride"
            #print(key)
            data["strides_0"] = data[key][0]
            data["strides_1"] = data[key][1]
            del data["strides"]
            if "stride" in data.keys():
                del data["stride"]

        dataset[file] = data

    return dataset

def create_dataframe(dataset : dict, workload_name : str, labels = ["time", "power", "memory"]):
    df = pd.DataFrame.from_dict(dataset, orient='index')
    categoricals = [
        "output dtype",
        "compute dtype",
        "workload",
        ]
    if workload_name in ["conv2d", "max_pool2d", "avg_pool2d", "depthwise_conv2d", "dilated_conv2d"]:
        if "data_layout" in list(df.columns):
            categoricals += [
                #"padding",
                "data_layout",
            ]
        else:    
            categoricals += [
                #"padding",
                "data layout",
            ]
        

    if "conv2d" in workload_name:
        if "kernel_layout" in list(df.columns):
            categoricals += [
                "kernel_layout",
            ]
        if "kernel layout" in list(df.columns):
            categoricals += [
                "kernel layout",
            ]

    for col in categoricals:
        oh = pd.get_dummies(df[col], prefix=col, drop_first=False)
        df = pd.concat([df, oh], axis=1).drop(col, axis=1)
    
    features = list(df.columns)
    #labels = ["time", "power", "memory"]
    for label in labels:
        idx = features.index(label)
        del features[idx]
    del idx
    df = df.drop_duplicates(subset=features)

    label_data = []
    for label in labels:
        label_data.append(df[label])
        df.pop(label)

    output = pd.concat(label_data, axis=1)

    return df, output

In [3]:
dataset_base = "./dataset"
targets = os.listdir(dataset_base)

layer_targets = list()
for target in targets:
    target_path = dataset_base + "/" + target
    layers = os.listdir(target_path)
    
    for layer in layers:
        dataset_path = target_path + "/" + layer + "/"
        print(dataset_path)
        layer_targets.append(dataset_path)

print("found {0} folders with samples, going to train models for each of these targets".format(len(layer_targets)))
print()

./dataset/cuda_980ti/avg_pool2d/
./dataset/cuda_980ti/conv2d/
./dataset/cuda_980ti/dense/
./dataset/cuda_980ti/depthwise_conv2d/
./dataset/cuda_980ti/dilated_conv2d/
./dataset/cuda_980ti/max_pool2d/
./dataset/cuda_A100/avg_pool2d/
./dataset/cuda_A100/conv2d/
./dataset/cuda_A100/dense/
./dataset/cuda_A100/depthwise_conv2d/
./dataset/cuda_A100/dilated_conv2d/
./dataset/cuda_A100/max_pool2d/
./dataset/cuda_K80/avg_pool2d/
./dataset/cuda_K80/conv2d/
./dataset/cuda_K80/dense/
./dataset/cuda_K80/depthwise_conv2d/
./dataset/cuda_K80/dilated_conv2d/
./dataset/cuda_K80/max_pool2d/
./dataset/latest_rasp4b/rasp4b_densenet121-conv1-conv_1.json/
./dataset/latest_rasp4b/rasp4b_densenet121-conv1-conv_16.json/
./dataset/latest_rasp4b/rasp4b_densenet121-conv1-conv_32.json/
./dataset/latest_rasp4b/rasp4b_densenet121-conv1-conv_64.json/
./dataset/latest_rasp4b/rasp4b_densenet121-conv1-conv_8.json/
./dataset/latest_rasp4b/rasp4b_densenet121-conv2_block1_1_conv_1.json/
./dataset/latest_rasp4b/rasp4b_densen

In [9]:
path = "./dataset/latest_rasp4b/"
targets = os.listdir(path)
print(len(targets))

print(target)
tmp = path.split("/")
workload_name = "conv2d"
device_name = "rasp4b"
files = os.listdir(path)
dataset = {}

labels = ["time", "power", "memory"]
if device_name == "rasp4b":
    labels = ["time", "power", "ws_size", "io_size"]

print("{} : {}\t:\t contains {} samples".format(workload_name, device_name, len(files)))
dataset = prepare_dataset(path, workload_name)
#print("\tLoading data into memory:\tcompleted")
df, output = create_dataframe(dataset, workload_name, labels=labels)

9938
rasp4b
conv2d : rasp4b	:	 contains 9938 samples


In [12]:
df.dilation_0.unique()

array([1, 2, 4])

In [13]:
len(df)

2816

In [11]:
output

Unnamed: 0,time,power,ws_size,io_size
rasp4b_densenet121-conv1-conv_1.json,0.093949,4.29605,3085888,3211264
rasp4b_densenet121-conv1-conv_16.json,0.993068,4.09810,48808768,51380224
rasp4b_densenet121-conv1-conv_32.json,1.873140,3.94830,97579840,102760448
rasp4b_densenet121-conv1-conv_64.json,3.884650,4.17835,195121984,205520896
rasp4b_densenet121-conv1-conv_8.json,0.513361,4.26930,24423232,25690112
...,...,...,...,...
rasp4b_xception-conv2d_3_dilation_rate_4_16.json,0.479192,3.95365,7641408,6553600
rasp4b_xception-conv2d_64.json,0.747828,4.11415,134611072,179437568
rasp4b_xception-conv2d_8.json,0.125812,4.40305,16855168,22429696
rasp4b_xception-conv2d_dilation_rate_2_1.json,0.017046,3.91620,1434752,2803712
