In [None]:
import pandas as pd
import ipywidgets as widgets
from pandas.api.types import is_numeric_dtype
from sklearn.linear_model import LinearRegression

class ToggleButtons:
    class Row(widgets.HBox):
        def __init__(self, name, options):
            self.toggle_buttons = widgets.ToggleButtons(options=options)
            self.label = widgets.Label(value=name, layout={"width": "400px"})
            super().__init__([self.label, self.toggle_buttons])

        @property
        def name(self):
            return self.label.value
        
        @property
        def value(self):
            return self.toggle_buttons.value

        @value.setter
        def value(self, value):
            self.toggle_buttons.value = value
        
        def is_raw(self):
            raw = ["CNTVCT", "PMCCNTR"]
            return self.name.startswith("0x") or self.name in raw
        
        def is_absolute(self):
            relative = ["%", "rate", "throughput"]
            return not any([(r in self.name) for r in relative]) and not self.is_raw()

        def should_drop(self):
            drop_list = [
                "Statistics::Execution time (s)", 
                "Difference",
            ]
            return self.name in drop_list

    def __init__(self, columns):
        self.columns = columns
        self.OPTIONS = ['Keep', 'Normalise', 'Drop']
        
        rows = [self.Row(col, self.OPTIONS) for col in columns]
        self.rows = widgets.VBox(rows)
        
        self.set_if_true("Drop", self.Row.is_raw)
        self.set_if_true("Normalise", self.Row.is_absolute)
        self.set_if_true("Drop", self.Row.should_drop)
    
    def get_labels(self, option: str):
        assert option in self.OPTIONS
        rows = self.rows.children
        return [row.name for row in rows if row.value == option]

    def set_if_true(self, option: str, fn):
        assert option in self.OPTIONS
        for row in self.rows.children:
            if fn(row):
                row.value = option


In [None]:
import linreg
data = linreg.get_data("~/Sync/all.tsv", "~/Sync/polybench.tsv")
tbs = ToggleButtons(data.columns)
tbs.rows

In [None]:
norm_cols = tbs.get_labels("Normalise")
drop_cols = tbs.get_labels("Drop")
X, y = linreg.normalise(data, norm_cols, drop_cols)
reg = LinearRegression().fit(X, y)
print(f"Score: {reg.score(X, y)}")

coefs = sorted(zip(X.columns, reg.coef_), key=lambda t: t[1], reverse=True)
linreg.print_results(coefs)
print(f"intercept: {reg.intercept_}")
