In [None]:
# point to root dir
import os

os.chdir('../')
os.getcwd()

In [None]:
# imports
import numpy as np
import pandas as pd
from celer import GroupLasso

from py_scripts.processing import adapt_to_group_lasso
from py_scripts.analytics import score_features

In [None]:
# load data
X = pd.read_csv("data/automobile-X.csv")
y = pd.read_csv("data/automobile-y.csv")

In [None]:
# fit group lasso

# pre processing
X_dummies, groups_sizes = adapt_to_group_lasso(X)

# init and fit
grp_lasso = GroupLasso(
    groups=groups_sizes,
    alpha=1.
)

grp_lasso.fit(X_dummies.values, y.values)

In [None]:
# inspect solution
list(zip(X_dummies.columns, grp_lasso.coef_))

In [None]:
# score variables and sort
scores = score_features(X, y)

sorted_scores = sorted(scores, 
                       key=lambda item: item[1], 
                       reverse=True)

In [None]:
# bar plot of scores
import plotly.graph_objects as go
from operator import itemgetter

fig = go.Figure(data=[
    go.Bar(
        x=[s[0] for s in sorted_scores],
        y=[s[1] for s in sorted_scores]
    )
])

fig.update_layout(
    title="Scores of variables",
    yaxis={
        'type': 'log',
        'title': 'score'
    }
)

fig.show()

In [None]:
# select top K features
K = 5

print(sorted_scores[:K])