In [1]:
import numpy as np
import pandas as pd
from river import evaluate
from sail.models.river.forest import AdaptiveRandomForestRegressor
from sail.pipeline import SAILPipeline
from sklearn.impute import SimpleImputer
from sail.transformers.river.preprocessing import StandardScaler

#### Load Data


In [2]:
X = pd.read_csv("../datasets/HDWF2.csv").head(2000)

y = X["power"]
X.drop(["power", "time"], axis=1, inplace=True)

#### Model Definition


In [3]:
random_forest = AdaptiveRandomForestRegressor()

#### Create SAIL Pipeline


In [4]:
steps = [
    ("Imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("standard_scalar", StandardScaler()),
    ("classifier", random_forest),
]
sail_pipeline = SAILPipeline(steps=steps, scoring="R2", verbosity_level=1, verbosity_interval=2)

#### Train Test Split


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

#### Start Incremental Training


In [6]:
y_preds = []
y_true = []
batch_size = 100

for start in range(0, 1000, batch_size):

    end = start + batch_size

    X = X_train.iloc[start:end]
    y = y_train.iloc[start:end]

    sail_pipeline.partial_fit(X, y)


    
>> Epoch: 1 | Samples Seen: 0 -------------------------------------------------------------------------------------

    
>> Epoch: 2 | Samples Seen: 100 -------------------------------------------------------------------------------------

    
>> Epoch: 4 | Samples Seen: 300 -------------------------------------------------------------------------------------

    
>> Epoch: 6 | Samples Seen: 500 -------------------------------------------------------------------------------------

    
>> Epoch: 8 | Samples Seen: 700 -------------------------------------------------------------------------------------

    
>> Epoch: 10 | Samples Seen: 900 -------------------------------------------------------------------------------------


In [7]:
sail_pipeline._scorer

<sail.common.scorer.SAILModelScorer at 0x29e51a860>

#### Save SAIL Pipeline


In [8]:
sail_pipeline.save(".")

[2023-10-02 00:51:24:702] - INFO - SAIL (SAILModel) - Model saved successfully.


'./sail_pipeline'

#### Load SAIL Pipeline


In [9]:
sail_new_pipeline = SAILPipeline.load(".")

[2023-10-02 00:51:24:776] - INFO - SAIL (SAILModel) - Model loaded successfully.


In [10]:
y_preds = []
y_true = []
batch_size = 100

for start in range(1001, 1400, batch_size):

    end = start + batch_size

    X = X_train.iloc[start:end]
    y = y_train.iloc[start:end]

    sail_new_pipeline.partial_fit(X, y)


    
>> Epoch: 12 | Samples Seen: 1100 -------------------------------------------------------------------------------------

    
>> Epoch: 14 | Samples Seen: 1300 -------------------------------------------------------------------------------------


#### Make Prediction on Hold out set


In [11]:
y_preds = []
y_true = []
batch_size = 100

for start in range(0, X_test.shape[0], batch_size):

    end = start + batch_size

    X = X_test.iloc[start:end]
    y = y_test.iloc[start:end]

    preds = sail_new_pipeline.predict(X)
    y_preds.extend(list(preds))
    y_true.extend(list(y))

#### Final Score


In [12]:
sail_new_pipeline.get_progressive_score

0.8186081075135777

In [13]:
import plotly.express as px

df = pd.DataFrame({"y_true": y_true, "y_preds": y_preds}).head(100)
fig = px.line(df, y=["y_true", "y_preds"], title='')
fig.show()