In [None]:
import pandas as pd
import ipywidgets as widgets
from pandas.api.types import is_numeric_dtype
from sklearn.linear_model import LinearRegression

class Checkboxes:
    def __init__(self, columns):
        self.columns = columns
        self.checkboxes = {}
        self.checkboxes["norm"] = self.create_cols("Normalise")
        self.checkboxes["drop"] = self.create_cols("Drop")
        self.checkboxes["both"] = widgets.HBox(list(self.checkboxes.values()))
        self.SPEC_RAW = []
        for cb in self.get_raw("norm"):
            cb.value = True

    def create_cols(self, name: str):
        return widgets.VBox([widgets.Label(value=name)] + [widgets.Checkbox(
            value=False,
            description=label,
            disabled=False,
            indent=False) for label in self.columns])

    def get_index(self, value: str):
        # adjust for the + 1 label!
        return list(self.columns).index(value) + 1

    def get_widget(self, cb_col: str, label: str):
        return self.checkboxes[cb_col].children[self.get_index(label)]

    def get_column_names(self, cb_col: str):
        return [col for col in self.columns if self.get_widget(cb_col, col).value]

    def show(self):
        return self.checkboxes["both"]

    def is_raw(self, cb):
        return cb.description.startswith("0x") or cb.description in self.SPEC_RAW

    def get_raw(self, cb_col: str):
        cboxes = self.checkboxes[cb_col].children
        return [cb for cb in cboxes if self.is_raw(cb)]


In [None]:
import linreg
data = linreg.get_data("~/Sync/all.tsv", "~/Sync/polybench.tsv")
checkboxes = Checkboxes(data.columns)
checkboxes.show()

In [None]:
norm_cols = checkboxes.get_column_names("norm")
drop_cols = checkboxes.get_column_names("drop")
X, y = linreg.normalise(data, norm_cols, drop_cols)
reg = LinearRegression().fit(X, y)
print(f"Score: {reg.score(X, y)}")

coefs = sorted(zip(X.columns, reg.coef_), key=lambda t: t[1], reverse=True)
linreg.print_results(coefs)
print(reg.intercept_)
