In [None]:
import pandas as pd
import ipywidgets as widgets
from pandas.api.types import is_numeric_dtype
from sklearn.linear_model import LinearRegression

class ToggleButtons:
    class Row(widgets.HBox):
        def __init__(self, name, options):
            self.OPTIONS = options
            self.toggle_buttons = widgets.ToggleButtons(options=options)
            self.label = widgets.Label(value=name, layout={"width": "400px"})
            super().__init__([self.label, self.toggle_buttons])

        @property
        def name(self):
            return self.label.value
        
        @property
        def value(self):
            return self.toggle_buttons.value

        @value.setter
        def value(self, value):
            assert value in self.OPTIONS
            self.toggle_buttons.value = value

        @property
        def index(self):
            return self.toggle_buttons.index
        
        def is_raw(self):
            raw = ["CNTVCT", "PMCCNTR"]
            return self.name.startswith("0x") or self.name in raw
        
        def is_absolute(self):
            relative = ["%", "rate", "throughput"]
            return not any([(r in self.name) for r in relative]) and not self.is_raw()

    def __init__(self, columns):
        self.columns = columns
        self.OPTIONS = ['Keep', 'Normalise', 'Drop']
        
        rows = [self.Row(col, self.OPTIONS) for col in columns]
        self.rows = widgets.VBox(rows)
        
        # self.conf_drop_all_except("Busy::L1 busy rate (%)")
        self.conf1()

    def conf1(self):    
        self.set_if_true("Drop", self.Row.is_raw)
        self.set_if_true("Normalise", self.Row.is_absolute)
        
        drop_list = [
            "Statistics::Execution time (s)", 
            "Difference",
            "Gem5 time",
            "A64fx time",
        ]
        self.set_if_in_list("Drop", drop_list)

    def conf2(self):    
        self.set_if_true("Drop", self.Row.is_absolute)
        self.set_if_true("Normalise", self.Row.is_raw)
    
    def conf_drop_all_except(self, label):
        self.set_if_true("Drop", lambda t: t.name != label)

    def get_num_repr(self):
        base = len(self.OPTIONS)
        result = 0
        for row in self.rows.children:
            result = base * result + row.index
        return result
    
    def get_labels(self, option: str):
        assert option in self.OPTIONS
        rows = self.rows.children
        return [row.name for row in rows if row.value == option]

    def set_if_true(self, option: str, fn):
        for row in self.rows.children:
            if fn(row):
                row.value = option

    def set_if_in_list(self, option: str, lst):
        for row in self.rows.children:
            if row.name in lst:
                row.value = option
        


In [None]:
import linreg
data = linreg.get_data("~/Sync/all.tsv", "~/Sync/polybench.tsv")
tbs = ToggleButtons(data.columns)
tbs.rows

In [None]:
norm_cols = tbs.get_labels("Normalise")
drop_cols = tbs.get_labels("Drop")
X, y = linreg.normalise(data, norm_cols, drop_cols)
reg = LinearRegression().fit(X, y)
score = reg.score(X, y)
print(f"Score: {score}")

coefs = sorted(zip(X.columns, reg.coef_), key=lambda t: t[1], reverse=True)
print(f"intercept: {reg.intercept_}")
coefs_df = pd.DataFrame(coefs)
coefs_df.to_csv(f"logs/simaccuracy-logreg-{tbs.get_num_repr()}", sep="\t")
coefs_df

In [None]:
%matplotlib notebook

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

top = "Busy::L1 busy rate (%)"
bot = "Cache::L2 miss hardware prefetch rate (%) (/L2 miss)"

plt.plot(X[bot], y, "o")

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
print(matplotlib.get_backend())
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[top], X[bot], y, marker="o")