<a href="https://colab.research.google.com/github/RahulDas-dev/SKtime-Examples/blob/master/Pycaret.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pycaret -q
!pip install statsforecast -q
!pip install pycaret[models] -q

**Step 1** Initialized an instance called exp_auto using the `TSForecastingExperiment()` class.

**Step 2** Set up the experiment using the provided arguments by executing `exp_auto.setup(**args)`.

**Step 3** Compared different models using `exp_auto.compare_models(**args)` and obtained the best model.

**Step 4** Further tuned the best model using `exp_auto.tune_model(best_model)` and obtained the best-tuned model.

**Step 5** Finalized the best-tuned model using `exp_auto.finalize_model(best_tuned_model)`.

**Step 6** Finally, I saved the `final_best_tuned_model` as `model_123` using `exp_auto.save_model(final_best_tuned_model, 'model_123')`.

Now, for future forecasting, I encountered an issue at **Step 7**.

 In this step, I attempted to load the model using `exp_auto.load_model('model_123')`. However, in the future, I won't have the `exp_auto` instance available, and I will only have the saved model pickle file which is model_123.pkl. I am unsure how to resolve this issue.

In [None]:
from typing import List, Union


# show_versions()
import numpy as np
import pandas as pd

from pycaret.time_series import TSForecastingExperiment
from pycaret.datasets import get_data
import statsforecast
from pycaret import show_versions

from sktime.utils import plotting

In [None]:
dataframe = pd.read_csv("./drive/MyDrive/datasets/Tetuan_City_power_consumption.csv")

# dataframe = get_data("airquality", verbose=False)
print(dataframe.shape)

# dataframe['Date_Time'] = dataframe['Date']+dataframe['Time'].apply(lambda x: str(' ')+x)
# dataframe['Date_Time'] = pd.to_datetime(dataframe['Date_Time'])
# dataframe.drop(columns=['Date','Time'], inplace=True)


# cols_sel = ['Date_Time','CO(GT)','PT08.S1(CO)','NMHC(GT)','C6H6(GT)','PT08.S2(NMHC)','NOx(GT)','T','RH']

# cols_sel = ['date', 'pollution','dewp','temp','press','wnd_spd','snow','rain']

# dataframe = dataframe[cols_sel].copy(deep=True)

# dataframe.replace(-200, np.nan, inplace=True)

dataframe.head()

In [None]:
def replace_null(dataframe: pd.DataFrame, method: str = "ffill") -> pd.DataFrame:
    dataframe_ = dataframe.copy(deep=True)
    if dataframe_.isna().sum().sum() == 0:
        print("No Null Value Found")
        return dataframe_
    dataframe_.fillna(method=method, inplace=True)
    print(f"replace_null: DF Shape {dataframe_.shape}")
    return dataframe_


def create_index(dataframe: pd.DataFrame, col: str, format: str = None) -> pd.DataFrame:
    dataframe_ = dataframe.copy(deep=True)
    dataframe_["Index"] = pd.to_datetime(dataframe[col], format=format)
    dataframe_.set_index(keys="Index", inplace=True)
    print(f"format_datetime: DF Shape {dataframe_.shape}")
    return dataframe_


def resample_data(dataframe: pd.DataFrame, freq: str = "D") -> pd.DataFrame:
    dataframe_ = dataframe.copy(deep=True)
    dataframe_ = dataframe_.resample(freq).mean(numeric_only=True)
    print(f"resample_Data: DF Shape {dataframe_.shape}")
    return dataframe_


def index_to_column(dataframe: pd.DataFrame, col: str = "DateTime") -> pd.DataFrame:
    dataframe_ = dataframe.copy(deep=True)
    dataframe_[col] = dataframe_.index
    print(f"resample_Data: DF Shape {dataframe_.shape}")
    return dataframe_


dataframe_ = (
    dataframe.pipe(create_index, "DateTime")
    .pipe(resample_data, freq="H")
    .pipe(index_to_column, "DateTime")
)


print(dataframe_.shape)
dataframe_.head()

In [None]:
dataframe_.isna().sum()

In [None]:
global_fig_settings = {
    # "renderer": "notebook",
    "renderer": "png",
    "width": 1500,
    "height": 500,
}

exp_auto = TSForecastingExperiment()

FH = 48
metric = "mase"
target = "Zone 3  Power Consumption"
exp_auto.setup(
    data=dataframe_,
    target=target,
    index="DateTime",
    engine={"auto_arima": "statsforecast"},
    use_gpu=True,
    fh=FH,
    enforce_exogenous=True,
    numeric_imputation_target="ffill",
    numeric_imputation_exogenous="ffill",
    fig_kwargs=global_fig_settings,
    session_id=42,
)

In [None]:
exclude = ["auto_arima", "arima"]
best = exp_auto.compare_models(sort="MAE", turbo=True, exclude=exclude)

In [None]:
best_tuned_model = exp_auto.tune_model(best)

In [None]:
forcasted_values = exp_auto.plot_model(
    best_tuned_model, plot="forecast", data_kwargs={"fh": 36}, return_data=True
)
# print(forcasted_values)

In [None]:
# forcasted_values['original_data']
# forcasted_values['overlay_data']

len(forcasted_values["original_data"]), len(forcasted_values["overlay_data"])
plotting.plot_series(
    forcasted_values["original_data"].tail(360),
    forcasted_values["overlay_data"],
    labels=["Train", "Test"],
)

In [None]:
insample_forcasted_values = exp_auto.plot_model(
    best_tuned_model, plot="insample", data_kwargs={"fh": 36}, return_data=True
)

In [None]:
final_best = exp_auto.finalize_model(best_tuned_model)

In [None]:
final_best.predict()

In [None]:
!pwd