In [2]:
import mlflow
import bentoml 

import pandas as pd
import matplotlib.pyplot as plt

from darts import TimeSeries

from darts.models import LightGBMModel, XGBModel, Prophet

from darts.dataprocessing.transformers import Scaler, StaticCovariatesTransformer
from darts.dataprocessing import Pipeline

from darts.metrics import mape

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
absences_df = pd.read_csv('../../data/processed/absences_daily.csv')
series = TimeSeries.from_dataframe(absences_df, time_col='date', value_cols='count', fill_missing_dates=True, freq='D')
single_train_ts, single_test_ts = series.split_before(pd.Timestamp('2023-01-01'))
pipeline = Scaler()
#pipeline = Pipeline([Scaler(), StaticCovariatesTransformer()]) # MinMaxScaler
train_dataset_ts_prepared = pipeline.fit_transform(single_train_ts)
test_dataset_ts_prepared = pipeline.transform(single_test_ts)
forecast_horizons = len(single_test_ts)

In [8]:
model_XG = XGBModel(lags=2, 
                    output_chunk_length=356, 
                    random_state=0,
                    multi_models=False
                    )

model_XG.fit(series=train_dataset_ts_prepared)
model_XG_predict = model_XG.predict(n=forecast_horizons, series=train_dataset_ts_prepared)
model_XG_predict = pipeline.inverse_transform(model_XG_predict)

In [9]:
loaded_model = mlflow.lightgbm.load_model('./mlruns/985497283900259339/1bb7d01fa8fc4bf8b8c83d9634ff93c7/artifacts/model')

In [74]:
bentoml.picklable_model.save_model(
    'light_gbm',
    loaded_model,
    signatures={"predict": {"batchable": True}}
)

Model(tag="light_gbm:j722pmjyqoblvo6i", path="C:\Users\jeany\bentoml\models\light_gbm\j722pmjyqoblvo6i\")

In [75]:
model: LightGBMModel = bentoml.picklable_model.load_model("light_gbm:latest")

In [76]:
model.predict(n=forecast_horizons, series=train_dataset_ts_prepared)

In [77]:
bento_model: bentoml.Model = bentoml.models.get("light_gbm:latest")

In [78]:
my_runner: bentoml.Runner = bento_model.to_runner()

In [79]:
my_runner.init_local()

'Runner.init_local' is for debugging and testing only. Make sure to remove it before deploying to production.


In [84]:
my_runner

Runner(name='light_gbm', models=[Model(tag="light_gbm:j722pmjyqoblvo6i", path="C:\Users\jeany\bentoml\models\light_gbm\j722pmjyqoblvo6i")], resource_config=None, runnable_class=<class 'bentoml._internal.frameworks.picklable.get_runnable.<locals>.PicklableRunnable'>, embedded=False, runner_methods=[RunnerMethod(runner=..., name='predict', config=RunnableMethodConfig(batchable=True, batch_dim=(0, 0), input_spec=None, output_spec=None), max_batch_size=100, max_latency_ms=60000)], scheduling_strategy=<class 'bentoml._internal.runner.strategy.DefaultStrategy'>, workers_per_resource=1, runnable_init_params={}, _runner_handle=<bentoml._internal.runner.runner_handle.local.LocalRunnerRef object at 0x00000229FCA8C1F0>)

In [85]:
my_runner.run(n=forecast_horizons, series=train_dataset_ts_prepared)

In [87]:
my_runner.predict.run(n=forecast_horizons, series=train_dataset_ts_prepared)

# XGBoost Native 

In [43]:
import xgboost as xgb

In [44]:
absences_df = pd.read_csv('../../data/processed/absences_daily.csv')
#absences_df['date'] = absences_df['date'].to_datetime(format='%Y-%M-%D')
absences_df['date'] = pd.to_datetime(absences_df['date'])
split_date = '01-Jan-2022'
ab_train = absences_df.loc[absences_df['date'] <= split_date].copy()
ab_test = absences_df.loc[absences_df['date'] > split_date].copy()

In [45]:
def create_features(df, label=None):
    """
    Creates time series features from datetime index
    """
    df['date'] = df['date']
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    #df['weekofyear'] = df['date'].dt.weekofyear
    
    X = df[['dayofweek','quarter','month','year',
           'dayofyear','dayofmonth']]
    if label:
        y = df[label]
        return X, y
    return X

In [46]:
X_train, y_train = create_features(ab_train, label='count')
X_test, y_test = create_features(ab_test, label='count')

In [47]:
model_xg = xgb.XGBRegressor(n_estimators=1000)
model_xg.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        early_stopping_rounds=50,
       verbose=False) # Change verbose to True if you want to see it train



In [48]:
bentoml.xgboost.save_model("model_xg_native", model_xg)

Model(tag="model_xg_native:d647pbbyqg7g7o6i", path="C:\Users\jeany\bentoml\models\model_xg_native\d647pbbyqg7g7o6i\")

In [65]:
bento_model: bentoml.Model = bentoml.models.get("model_xg_native:latest")

In [66]:
my_runner: bentoml.Runner = bento_model.to_runner()

In [67]:
my_runner

Runner(name='model_xg_native', models=[Model(tag="model_xg_native:d647pbbyqg7g7o6i", path="C:\Users\jeany\bentoml\models\model_xg_native\d647pbbyqg7g7o6i")], resource_config=None, runnable_class=<class 'bentoml._internal.frameworks.xgboost.get_runnable.<locals>.XGBoostRunnable'>, embedded=False, runner_methods=[RunnerMethod(runner=..., name='predict', config=RunnableMethodConfig(batchable=False, batch_dim=(0, 0), input_spec=None, output_spec=None), max_batch_size=100, max_latency_ms=60000)], scheduling_strategy=<class 'bentoml._internal.runner.strategy.DefaultStrategy'>, workers_per_resource=1, runnable_init_params={}, _runner_handle=<bentoml._internal.runner.runner_handle.DummyRunnerHandle object at 0x00000229FCF0C670>)

In [55]:
my_runner.init_local()

'Runner.init_local' is for debugging and testing only. Make sure to remove it before deploying to production.


In [58]:
my_runner.predict.run(X_test)

array([ 64.76235 ,  68.60445 ,  74.94446 ,  74.94446 ,  74.94446 ,
        74.94446 ,  74.94446 ,  70.001175,  64.97466 ,  64.97466 ,
        64.97466 ,  64.97466 ,  64.97466 ,  64.97466 ,  33.797756,
        64.97466 ,  64.97466 ,  64.97466 ,  64.97466 ,  64.97466 ,
        64.97466 ,  33.797756,  64.97466 ,  64.97466 ,  64.97466 ,
        64.97466 ,  64.97466 ,  64.97466 ,  29.101082,  64.97466 ,
        58.63465 ,  58.63465 ,  58.63465 ,  64.97466 ,  64.97466 ,
        33.797756,  79.57553 ,  79.57553 ,  79.57553 ,  79.57553 ,
        79.57553 ,  87.14403 ,  33.259224,  90.24242 ,  90.24242 ,
        90.24242 ,  90.24242 ,  90.24242 ,  94.08136 ,  33.259224,
        90.24242 ,  90.24242 ,  90.24242 , 114.4979  , 114.4979  ,
       118.336845,  47.187515, 114.4979  , 114.4979  , 114.4979  ,
       114.4979  , 114.4979  , 118.336845,  49.575153, 114.4979  ,
       114.4979  , 114.4979  , 114.4979  , 114.4979  , 118.336845,
        44.940216, 114.4979  , 114.4979  , 114.4979  , 136.867