In [None]:
#| default_exp target_transforms

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| export
import abc
import reprlib
from typing import Iterable

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin, clone

from mlforecast.grouped_array import GroupedArray, _apply_difference

In [None]:
#| export
class BaseTargetTransform(abc.ABC, BaseEstimator):
    def set_column_names(self, id_col: str, time_col: str, target_col: str):
        self.id_col = id_col
        self.time_col = time_col
        self.target_col = target_col
        
    @abc.abstractmethod
    def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError
        
    @abc.abstractmethod
    def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError

In [None]:
#| export
class Differences(BaseTargetTransform):
    def __init__(self, differences: Iterable[int]):
        self.differences = list(differences)

    def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        ga = GroupedArray.from_sorted_df(df, self.id_col, self.target_col)
        uids = df[self.id_col].unique()  
        original_sizes = ga.indptr[1:].cumsum()
        total_diffs = sum(self.differences)
        small_series = uids[original_sizes < total_diffs]
        if small_series.size:
            msg = reprlib.repr(small_series.tolist())
            raise ValueError(f'The following series are too short for the differences: {msg}')
        self.original_values_ = []
        n_series = len(ga.indptr) - 1            
        for d in self.differences:
            new_data = np.empty_like(ga.data, shape=n_series * d)
            new_indptr = d * np.arange(n_series + 1, dtype=np.int32)
            _apply_difference(ga.data, ga.indptr, new_data, new_indptr, d)
            self.original_values_.append(GroupedArray(new_data, new_indptr))
        df = df.copy()
        df[self.target_col] = ga.data
        return df

    def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        model_cols = df.columns.drop([self.id_col, self.time_col])
        df = df.copy()
        for model in model_cols:
            model_preds = df[model].values.copy()
            for d, ga in zip(reversed(self.differences), reversed(self.original_values_)):
                ga.restore_difference(model_preds, d)
            df[model] = model_preds
        return df

In [None]:
#| export
class GlobalSklearnTransformer(BaseTargetTransform):
    def __init__(self, transformer: TransformerMixin):
        self.transformer = transformer
        
    def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        self.transformer_ = clone(self.transformer)
        df[self.target_col] = self.transformer_.fit_transform(df[[self.target_col]].values)
        return df
    
    def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        cols_to_transform = df.columns.drop([self.id_col, self.time_col])
        df[cols_to_transform] = self.transformer_.inverse_transform(df[cols_to_transform].values)
        return df

In [None]:
class LocalSklearnTransformer(BaseTargetTransform):
    def __init__(self, transformer: TransformerMixin):
        self.transformer = transformer
        
    def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        self.transformer_ = clone(self.transformer)
        return df
        
    def inverse_transform(self, pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        cols_to_transform = df.columns.drop([self.id_col, self.time_col])
        return df

In [None]:
from mlforecast import MLForecast
from mlforecast.utils import generate_daily_series
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PowerTransformer

boxcox_transformer = GlobalSklearnTransformer(PowerTransformer(method='box-cox'))
series = generate_daily_series(10)
fcst = MLForecast(
    models=[LinearRegression()],
    freq='D',
    lags=[1, 2],
    target_transforms=[boxcox_transformer]
)
fcst.fit(series)
fcst.predict(2)

Unnamed: 0,unique_id,ds,LinearRegression
0,id_0,2000-08-10,3.177458
1,id_0,2000-08-11,2.655017
2,id_1,2000-04-07,3.333926
3,id_1,2000-04-08,3.021444
4,id_2,2000-06-16,3.327134
5,id_2,2000-06-17,3.343061
6,id_3,2000-08-30,3.129761
7,id_3,2000-08-31,2.215409
8,id_4,2001-01-08,3.449457
9,id_4,2001-01-09,3.656765
