In [19]:
import numpy as np
import pandas as pd
from river import evaluate
from sail.models.river.forest import AdaptiveRandomForestRegressor
from sail.pipeline import SAILPipeline
from sklearn.impute import SimpleImputer
from sail.utils.logging import SAILVerbosity
from sail.transformers.river.preprocessing import StandardScaler

#### Load Data


In [20]:
X = pd.read_csv("../datasets/HDWF2.csv").head(2000)

y = X["power"]
X.drop(["power", "time"], axis=1, inplace=True)

#### Model Definition


In [21]:
random_forest = AdaptiveRandomForestRegressor()

#### Create SAIL Pipeline


In [22]:
steps = [
    ("Imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("standard_scalar", StandardScaler()),
    ("classifier", random_forest),
]
sail_pipeline = SAILPipeline(steps=steps, scoring="R2", verbosity=SAILVerbosity(verbosity=1, log_interval=4))

#### Train Test Split


In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

#### Start Incremental Training


In [24]:
y_preds = []
y_true = []
batch_size = 100

for start in range(0, X_train.shape[0], batch_size):

    end = start + batch_size

    X = X_train.iloc[start:end]
    y = y_train.iloc[start:end]

    sail_pipeline.partial_fit(X, y)



#### Save SAIL Pipeline


In [7]:
sail_pipeline.save(".")

[2023-07-11 21:30:53:547] - INFO - SAIL (SAILPipeline) : making directory tree ./sail_pipeline


'./sail_pipeline'

#### Load SAIL Pipeline


In [8]:
sail_new_pipeline = SAILPipeline.load(".")



#### Make Prediction on Hold out set


In [9]:
y_preds = []
y_true = []
batch_size = 100

for start in range(0, X_test.shape[0], batch_size):

    end = start + batch_size

    X = X_test.iloc[start:end]
    y = y_test.iloc[start:end]

    preds = sail_new_pipeline.predict(X)
    y_preds.extend(list(preds))
    y_true.extend(list(y))

#### Final Score


In [12]:
sail_new_pipeline.get_progressive_score

0.7752859135848691

In [11]:
import plotly.express as px

df = pd.DataFrame({"y_true": y_true, "y_preds": y_preds}).head(100)
fig = px.line(df, y=["y_true", "y_preds"], title='')
fig.show()