In [1]:
from importlib import reload

import polars as pl
from plotly import graph_objects as go

import coolsearch.models as cmodel
import coolsearch.search as csearch
import coolsearch.plotting as cplt

reload(csearch)
reload(cmodel)

cplt.set_plotly_template()

SEED = 137

## classification problem


In [2]:
from sklearn import datasets, model_selection
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier

X, y = datasets.make_classification(1000, 5, random_state=SEED)
X_train, X_val, y_train, y_val = model_selection.train_test_split(
    X, y, random_state=SEED
)

In [3]:
clf = DecisionTreeClassifier(random_state=SEED)
clf.fit(X_train, y_train)

print(classification_report(y_val, clf.predict(X_val), digits=4))

              precision    recall  f1-score   support

           0     0.9322    0.9091    0.9205       121
           1     0.9167    0.9380    0.9272       129

    accuracy                         0.9240       250
   macro avg     0.9244    0.9235    0.9239       250
weighted avg     0.9242    0.9240    0.9240       250



## Grid search DT-model


In [4]:
params = {
    "max_depth": (1, 30),
    "min_samples_leaf": (1, 50),
    "min_samples_split": (2, 8),
}

search = csearch.CoolSearch.model_validate(
    DecisionTreeClassifier(random_state=SEED),
    parameters=params,
    data=(X_train, X_val, y_train, y_val),
    metrics=accuracy_score,
)

_ = search.grid_search(steps=10)
print(search)
display(search.samples.sort(pl.col("value"))[0, :])

Searching 700 new parameter points


700it [00:01, 378.80it/s]

Sum of runtime: 1.83 s. Elapsed time 1.86 s.
Overhead: 0.0232 s.
3 dimensional search
  - has 700 samples





max_depth,min_samples_leaf,min_samples_split,runtime,value
i32,i32,i32,f64,f64
1,1,2,0.003274,0.904


### Compare to sklearn


## Visualizations


In [5]:
s = search.samples

go.Figure(
    go.Scatter(
        x=s["max_depth"],
        y=s["min_samples_leaf"],
        marker_color=(s["value"]),
        mode="markers",
    )
)

### polynomials & marginals


In [8]:
from coolsearch import models

polymod = models.PolynomialModel(search.samples, 2)
polyval = polymod.predict(search.get_grid(100))

margpoly = {}
for k in search.params:
    margpoly[k] = (
        polyval.group_by(k)
        .agg(
            pl.col("y_pred").mean().alias("mean"),
        )
        .sort(k)
    )

AttributeError: 'PolynomialModel' object has no attribute 'features'

In [None]:
marg = search.marginals()
margpoly = polymod.poly_marginals()


In [None]:
def marg_plot(
    marginal: pl.DataFrame,
    mean=True,
    median=False,
    intervals=False,
):
    """Plot a marginal distribution"""

    shade = "rgba(255,200,200,0.2)"
    stat_cols = ["mean", "std", "median"]

    feat = next(col for col in marginal.columns if col not in stat_cols)
    fig = go.Figure(
        layout=dict(
            title=feat,
            yaxis_title="value",
            margin=dict(t=50, l=20, r=10, b=10),
            width=400,
            height=200,
        ),
    )

    mu = marginal["mean"]
    std = marginal["std"]
    # TODO PROPER INTERVALS
    upper = mu + std
    lower = mu - std

    if mean:
        fig.add_trace(go.Scatter(x=marginal[feat], y=mu, name="mean"))
    if median:
        fig.add_trace(go.Scatter(x=marginal[feat], y=marginal["median"], name="median"))

    if intervals:
        fig.add_traces(
            [
                go.Scatter(
                    x=marginal[feat],
                    y=lower,
                    name="lower",
                    mode="lines",
                    line_color=shade,
                ),
                go.Scatter(
                    x=marginal[feat],
                    y=upper,
                    name="upper",
                    mode="lines",
                    fill="tonexty",
                    line_color=shade,
                    fillcolor=shade,
                ),
            ]
        )
    return fig


for k in marg.keys():
    marg_plot(margpoly[k], mean=True, median=True, intervals=True).show()