In [None]:
!pip install -U lightautoml

In [None]:
# Standard python libraries
import os

# Installed libraries
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

# Imports from our package
from lightautoml.tasks import Task
from lightautoml.addons.autots.base import AutoTS
from lightautoml.dataset.roles import DatetimeRole
from lightautoml.automl.base import AutoML
from lightautoml.ml_algo.boost_cb import BoostCB
from lightautoml.ml_algo.linear_sklearn import LinearLBFGS
from lightautoml.pipelines.features.lgb_pipeline import LGBSeqSimpleFeatures
from lightautoml.pipelines.features.linear_pipeline import LinearTrendFeatures
from lightautoml.pipelines.ml.base import MLPipeline
from lightautoml.reader.base import DictToPandasSeqReader
from lightautoml.automl.blend import WeightedBlender
from lightautoml.ml_algo.random_forest import RandomForestSklearn

# Disable warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
sales = pd.read_csv("data/shop_sales.csv")
dates = pd.read_csv("data/shop_sales_dates.csv")
prices = pd.read_csv("data/shop_sales_prices.csv")

In [None]:

class LamaTSA:
    
    def __init__(self, sales, dates, prices, store_id, item_id, period='7'):
        self.store_id = f"STORE_{store_id}" if isinstance(store_id, int) or store_id.isnumeric() else store_id
        self.item_id = f"{self.store_id}_{item_id}" if isinstance(item_id, int) or item_id.isnumeric() else item_id
        self.period = int(period)
        
        print("preprocessing loaded data...")
        self.df = self.preprocess_data(self.load_data(sales, dates, prices))
        print("LamaTSA init complete!")

    def load_data(self, sales, dates, prices):
        sales = pd.read_csv(sales)
        dates = pd.read_csv(dates)
        prices = pd.read_csv(prices)
        
        df = pd.merge(sales, dates, on='date_id')
        df = pd.merge(df, prices, on=['store_id', 'item_id', 'wm_yr_wk'])
        return df

        
    def preprocess_data(self, df):

        df['date'] = pd.to_datetime(df['date'])
        df = df.loc[df['store_id'] == self.store_id, ['item_id', 'date', 'cnt']]
        # todo fill empty periods

        print(f"data shape: {df.shape}")
        return df
    
    
    def split_train_test(self):
        test_start = self.df[self.df['item_id'] == self.item_id]['date'].values[-self.period]   # simple backtest
        train = self.df[self.df['date'] < test_start].copy()
        test = self.df[self.df['date'] >= test_start].copy()
        return train, test
        
        
    def define_training_task(self):
        
        # define task
        task = Task("multi:reg", greater_is_better=False, metric="mae", loss="mae")
        
        # configure model
        seq_params = {
            "seq0": {
                "case": "next_values",                  
                "params": {
                    "n_target": self.period,                
                    "history": self.period,                              
                    "step": 1, 
                    "from_last": True,
                    "test_last": True
                }
            }
        }
        
        transformers_params = {
            "lag_features": 30,
            "lag_time_features": 30,
        }
        
        automl = AutoTS(
            task,
            reader_params = {
                "seq_params": seq_params
            },
            time_series_trend_params={
                "trend": False,
            },
            time_series_pipeline_params=transformers_params
        )
        return automl
        
    def train_model(self, train_dataset):
        # load dataset
        univariate_train = self.df[self.df['item_id'] == self.item_id].drop("item_id", axis=1)
                
        # define roles
        univariate_roles = {
           "target": 'cnt',
           DatetimeRole(seasonality=('d', 'm', 'wd'), base_date=True): 'date',
        }
        
        # train model        
        model = self.define_training_task()
        univariate_train_pred, _ = model.fit_predict(univariate_train, univariate_roles, verbose=4)
        
        return model

    def eval_model(self, model, train_dataset, test_dataset):
        test_dataset = test_dataset[test_dataset['item_id'] == self.item_id].drop("item_id", axis=1)
        fcst, _ = model.predict(train_dataset)
        
        mae = mean_absolute_error(test_dataset.cnt.values, fcst)
        print(f"MAE: {mae}")       
        return mae
        
    def save_model(self, model, path):
        joblib.dump(model, path)
        return path
        
    def load_model(self, path):
        if not path:
            raise ValueError    
        elif not os.path.exists(path):
            raise ValueError
        
        model = joblib.load(path) 
        return model

    def predict(self, model, pred_df):        
        pred = pred_df[pred_df['item_id'] == self.item_id].drop("item_id", axis=1)
        fcst, _ = model.predict(pred)

        print(fcst, "\n")
        print(f"MAE: {mean_absolute_error(pred.cnt.values, fcst)}")
        return fcst
        

In [None]:
i = LamaTSA(
    sales="data/shop_sales.csv", 
    dates="data/shop_sales_dates.csv", 
    prices="data/shop_sales_prices.csv",
    store_id='2',
    item_id='586'
)

In [None]:
train, test = i.split_train_test()

In [None]:
model = i.train_model(train)
i.save_model(model, 'model.pkl')

In [None]:
i.eval_model(model, train, test)