In [7]:
from sail.pipeline import SAILPipeline
import numpy as np
import pandas as pd
from river import datasets, metrics, optim, stats
from sail.transformers.river.compose import Select
from sail.transformers.river.feature_extraction import TargetAgg
from sail.models.river.linear_model import LinearRegression
from sail.transformers.river.preprocessing import AdaptiveStandardScaler, StandardScaler
from sail.transformers.datetime import EncodeDateTransformer
from sail.transformers.column import ColumnNamePrefixTransformer
from sail.transformers.number import Polar2CartTransformer

### Loading the dataset


In [8]:
dataset = datasets.Bikes()
x, y = [], []
for data, label in dataset:
    x.append(data)
    y.append(label)

df = pd.DataFrame(x)
df["target"] = y
df["hour"] = df.moment.dt.hour

X = df.drop(["moment"], axis=1)
y = df["target"]

In [9]:
steps = [
    ("Select", Select("clouds", "humidity", "pressure", "temperature", "wind")),
    ("Scaler", StandardScaler()),
    ("regressor", LinearRegression(optimizer=optim.SGD(0.001))),
]

pipeline = SAILPipeline(steps=steps, scoring=metrics.MAE(), verbosity=0)

metric = metrics.MAE()

batch_size = 1
for start in range(0, df.shape[0], batch_size):
    end = start + batch_size
    X_train = df.iloc[start:end]
    y_train = y.iloc[start:end]

    if start > 0:
        # Predicting
        yhat = pipeline.predict(X_train)

        # Update the metric
        metric.update(y_train.to_numpy(), yhat)

    # Partially fitting the model
    model = pipeline.partial_fit(X_train, y_train)

    if start % 20000 == 0:
        print("MAE after", start, "iterations", metric.get())

print("Finally, MAE:", metric.get())

MAE after 0 iterations 0.0
MAE after 20000 iterations [4.91369848]
MAE after 40000 iterations [5.33356474]
MAE after 60000 iterations [5.33099467]
MAE after 80000 iterations [5.39232983]
MAE after 100000 iterations [5.42310781]
MAE after 120000 iterations [5.54129902]
MAE after 140000 iterations [5.61305014]
MAE after 160000 iterations [5.62248674]
MAE after 180000 iterations [5.5678413]
Finally, MAE: [5.56392979]


In [10]:
pipeline.get_progressive_score

5.56392979004822

In [None]:
# new_steps = [
#     (
#         "aggregation",
#         ColumnTransformer(
#             [
#                 (
#                     "Select",
#                     "passthrough",
#                     ["clouds", "humidity", "pressure", "temperature", "wind"],
#                 ),
#                 (
#                     "TargetAgg",
#                     TargetAgg(
#                         by=["station", "hour"],
#                         how=stats.Mean(),
#                     ),
#                     ["station", "hour"],
#                 ),
#             ],
#             verbose_feature_names_out=False,
#             remainder="drop",
#         ),
#     ),
#     ("Scaler", StandardScaler()),
#     ("regressor", LinearRegression(optimizer=optim.SGD(0.001))),
# ]