In [219]:
from importlib import reload

import numpy as np
from plotly import express as px
from plotly import graph_objects as go
from plotly import io as pio
from sklearn import linear_model, metrics, datasets, preprocessing, model_selection

import cool_search as cool

reload(cool)


plot_temp = pio.templates["plotly_dark"]
plot_temp.layout.width = 700
plot_temp.layout.height = 500
plot_temp.layout.autosize = False
pio.templates.default = plot_temp

## linear regression

In [220]:
rng = np.random.default_rng(seed=137)

X_train = np.linspace(0, 20, 80).reshape(-1, 1)
X_val = np.linspace(3, 22, 30).reshape(-1, 1)


def fun(x, sigma):
    return 3 + 0.8 * x + sigma * rng.normal(size=x.shape)


Y_true = fun(X_train, 0)
Y_train = fun(X_train, 2)
Y_val = fun(X_val, 2)


linreg = linear_model.ElasticNet(alpha=3.3, l1_ratio=1)
linreg.fit(X_train, Y_train)
y_linreg_default = linreg.predict(X_train.reshape(-1, 1)).flatten()


fig = go.Figure()

fig.add_traces(
    [
        go.Scatter(x=X_train.ravel(), y=Y_train.ravel(), mode="markers", name="data"),
        go.Scatter(x=X_train.ravel(), y=Y_true.ravel(), mode="lines", name="GT"),
        go.Scatter(
            x=X_train.ravel(), y=y_linreg_default.ravel(), mode="lines", name="linreg"
        ),
        go.Scatter(x=X_val.ravel(), y=Y_val.ravel(), mode="markers", name="validation"),
    ]
)

In [221]:
reload(cool)
search = cool.search_one_dim(
    estimator=linear_model.ElasticNet,
    param_name="alpha",
    param_range=(0.0001, 10),
    metric="MSE",
    fixed_params={
        "l1_ratio": 1,
    },
    X_train=X_train,
    Y_train=Y_train,
    X_val=X_val,
    Y_val=Y_val,
)

print(search)
pp, score = search.grid(100)
px.line(x=pp, y=score)

Cool search:
  - <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>
  - training data   (80, 1), (80, 1)
  - validation data (30, 1), (30, 1)
  - parameter       alpha (0.0001, 10)
  - samples         0


## classification

In [222]:
X, Y = datasets.make_classification(
    n_samples=1000,
    n_features=8,
    n_informative=4,
    n_classes=3,
    random_state=42,
)

X_train, X_val, Y_train, Y_val = model_selection.train_test_split(
    X,
    Y,
    test_size=0.2,
    random_state=42,
)


px.scatter(
    x=X_train[:, 0],
    y=X_train[:, 1],
    color=[str(c) for c in Y_train],
    width=300,
    height=300,
)


In [223]:
print("X_train", X_train.shape)
print("Y_train", Y_train.shape)

logreg = linear_model.LogisticRegression(C=0.3)
logreg.fit(X_train, Y_train)
print(metrics.classification_report(Y_val, logreg.predict(X_val), digits=5))
print(cool.search_one_dim.accuracy_score(Y_val, logreg.predict(X_val)))


X_train (800, 8)
Y_train (800,)
              precision    recall  f1-score   support

           0    0.81538   0.82812   0.82171        64
           1    0.59420   0.56164   0.57746        73
           2    0.48485   0.50794   0.49612        63

    accuracy                        0.63000       200
   macro avg    0.63148   0.63257   0.63176       200
weighted avg    0.63053   0.63000   0.63000       200

0.63


In [228]:
reload(cool)
search = cool.search_one_dim(
    estimator=linear_model.LogisticRegression,
    param_name="C",
    param_range=(0.01, 2),
    metric="accuracy",
    fixed_params={},
    X_train=X_train,
    Y_train=Y_train,
    X_val=X_val,
    Y_val=Y_val,
)


In [236]:
print(search)

pp, score = search.grid(100)
px.scatter(x=pp, y=score)


Cool search:
  - <class 'sklearn.linear_model._logistic.LogisticRegression'>
  - training data   (800, 8), (800,)
  - validation data (200, 8), (200,)
  - parameter       C (0.01, 2)
  - samples         307
computed 0 new samples
