In [1]:
import numpy as np
import pandas as pd
from sail.pipeline import SAILPipeline
from sail.models.keras import OSELM
from tensorflow import keras
import tensorflow as tf
from sklearn.impute import SimpleImputer
from sail.transformers.river.preprocessing import StandardScaler

#### Load Data


In [2]:
df = pd.read_csv(
    "../datasets/nyc_taxi.csv",
    header=0,
    names=["time", "passenger_count", "timeofday", "dayofweek"],
).dropna()

df["timeofday"] = pd.to_numeric(df["timeofday"], downcast="float")
df["dayofweek"] = pd.to_numeric(df["dayofweek"], downcast="float")
df["passenger_count"] = pd.to_numeric(df["passenger_count"], downcast="float")

y = df["passenger_count"]
X = df.drop(["passenger_count", "time"], axis=1)

#### Model Definition


In [3]:
model = OSELM(
    loss="mae",
    optimizer=keras.optimizers.Adam,
    metrics=["mse"],
    epochs=1,
    verbose=0,
    num_hidden_nodes=25,
    hidden_layer_activation=tf.nn.sigmoid,
    num_output_nodes=1,
    forgetting_factor=0.5,
)

#### Create SAIL Pipeline


In [4]:
steps = [
    ("Imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("regressor", model),
]
sail_pipeline = SAILPipeline(steps=steps, scoring="R2", verbosity_level=1, verbosity_interval=50)

#### Start Incremental Training


In [5]:
y_preds = []
y_true = []
batch_size = 1
for start in range(0, 500, batch_size):

    end = start + batch_size

    Xt = X.iloc[start:end]
    yt = y.iloc[start:end]

    if start != 0 :
        preds = sail_pipeline.predict(Xt)
        y_preds.extend(list(preds))
        y_true.extend(list(yt))

    sail_pipeline.partial_fit(Xt, yt)


    
>> Epoch: 1 | Samples Seen: 0 -------------------------------------------------------------------------------------
SAIL Pipeline Partial fit [regressor]:  50%=====      [Steps: 1/2, ETA: 00:00<00:00, Elapsed:0.007s, Batch Size=1]

Cause: mangled names are not yet supported
Cause: mangled names are not yet supported


2023-10-02 02:01:58.919023: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz

    
>> Epoch: 50 | Samples Seen: 49 -------------------------------------------------------------------------------------

    
>> Epoch: 100 | Samples Seen: 99 -------------------------------------------------------------------------------------

    
>> Epoch: 150 | Samples Seen: 149 -------------------------------------------------------------------------------------

    
>> Epoch: 200 | Samples Seen: 199 -------------------------------------------------------------------------------------

    
>> Epoch: 250 | Samples Seen: 249 -------------------------------------------------------------------------------------

    
>> Epoch: 300 | Samples Seen: 299 -------------------------------------------------------------------------------------

    
>> Epoch: 350 | Samples Seen: 349 -------------------------------------------------------------------------------------



#### Final Score


In [6]:
sail_pipeline.get_progressive_score

0.8307954112664475

In [7]:
import plotly.express as px

df = pd.DataFrame({"y_true": y_true, "y_preds": y_preds}).head(100)
fig = px.line(df, y=["y_true", "y_preds"], title='')
fig.show()