In [1]:
!pip install etna[all]

Collecting etna[all]
  Using cached etna-1.15.1-py3-none-any.whl (273 kB)




Installing collected packages: etna
Successfully installed etna-1.15.1


In [2]:
# импортируем необходимые библиотеки, модули, классы и функции
import pandas as pd
import numpy as np
import re
from typing import List

from lightgbm import LGBMRegressor

from etna.pipeline import Pipeline
from etna.datasets.tsdataset import TSDataset
from etna.transforms import (
    StandardScalerTransform,
    MeanTransform, 
    LagTransform,
    DateFlagsTransform)
from etna.analysis import plot_forecast
from etna.metrics import SMAPE
from etna.models.base import (BaseAdapter, 
                              NonPredictionIntervalContextIgnorantAbstractModel)
from etna.models.mixins import (PerSegmentModelMixin,
                                MultiSegmentModelMixin,
                                NonPredictionIntervalContextIgnorantModelMixin)

# отключаем предупреждения
import warnings
warnings.filterwarnings('ignore')



In [3]:
# пишем функцию построения модели ETNA и оценки ее качества
def train_and_evaluate_model(ts, 
                             model,
                             transforms,
                             horizon,
                             metrics,
                             print_metrics=False,
                             print_plots=False,
                             n_train_samples=None):
    """
    Обучает модель, вычисляет прогнозы для 
    тестовой выборки и строит график прогнозов.  
    
    Параметры
    ----------
    ts: pandas.DataFrame
        Временной ряд.
    model: instance of class etna
        Экземпляр класса библиотеки etna.
    transforms: list
        Список преобразований.
    horizon: int
        Горизонт прогнозирования.
    metrics: instance of class etna.metrics.SMAPE/
        MAE/R2/MAPE/MedAE/MSLE   
        Метрика качества.
    print_metrics: bool, по умолчанию False
        Печать метрик.
    print_plots: bool, по умолчанию False
        Печать графиков прогнозов.
    n_train_sample: int
        n последних наблюдений обучающей выборки 
        на графике прогнозов.
    """
    if not print_plots and n_train_samples is not None:
        raise ValueError(
            "Параметр n_train_samples задается при print_plots=True")
        
    # разбиваем набор на обучающую и тестовую выборки 
    # с учетом временной структуры, размер тестовой 
    # выборки задаем равным горизонту
    train_ts, test_ts = ts.train_test_split(test_size=horizon)
    # создаем конвейер
    pipe = Pipeline(model=model,
                    transforms=transforms,
                    horizon=horizon)
    # обучаем конвейер
    pipe.fit(train_ts)
    # получаем прогнозы
    forecast_ts = pipe.forecast()
    # оцениваем качество прогнозов по сегментам
    segment_metrics = metrics(test_ts, forecast_ts)
    segment_metrics = pd.Series(segment_metrics)
    
    if print_metrics:
        print(segment_metrics.to_string())
        print("")
        # оцениваем качество прогнозов в среднем
        print(f"Усредненная метрика:"
              f"{sum(segment_metrics) / len(segment_metrics)}")
    
    if print_plots:
        # визуализируем прогнозы, здесь n_train_samples
        # - n последних наблюдений в обучающей выборке
        plot_forecast(forecast_ts, test_ts, 
                      train_ts, n_train_samples=n_train_samples)

# пишем ядро - внутренний класс _LGBMAdapter,
# внутри - класс LGBMRegressor
class _LGBMAdapter(BaseAdapter):
    def __init__(
        self,
        boosting_type='gbdt',
        num_leaves=31,
        max_depth=-1,
        learning_rate=0.1,
        n_estimators=100,
        **kwargs
    ):
        self.model = LGBMRegressor(
            boosting_type=boosting_type,
            num_leaves=num_leaves,
            max_depth=max_depth,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            **kwargs
        )
        self._categorical = None

    def fit(self, df: pd.DataFrame, regressors: List[str]):
        df = df.rename(columns=lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
        features = df.drop(columns=['timestamp', 'target'])
        self._categorical = features.select_dtypes(
            include=['category']).columns.to_list()
        target = df['target']
        self.model.fit(X=features, y=target, 
                       categorical_feature=self._categorical)
        return self

    def predict(self, df: pd.DataFrame):
        features = df.drop(columns=['timestamp', 'target'])
        pred = self.model.predict(features)
        return pred
    
    def get_model(self) -> LGBMRegressor:
        return self.model

# пишем класс - модель LGBMMultiSegmentModel
class LGBMMultiSegmentModel(
    MultiSegmentModelMixin,
    NonPredictionIntervalContextIgnorantModelMixin,
    NonPredictionIntervalContextIgnorantAbstractModel):
    def __init__(
        self,
        boosting_type='gbdt',
        num_leaves=31,
        max_depth=-1,
        learning_rate=0.1,
        n_estimators=100,
        **kwargs
    ):
        self.boosting_type = boosting_type
        self.num_leaves = num_leaves
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.kwargs = kwargs
        super().__init__(
            base_model=_LGBMAdapter(
                boosting_type=boosting_type,
                num_leaves=num_leaves,
                max_depth=max_depth,
                learning_rate=learning_rate,
                n_estimators=n_estimators,
                **kwargs
            )
        )

In [4]:
# загружаем исторический набор
df = pd.read_csv('/kaggle/input/demand-forecasting-kernels-only/train.csv',
                 parse_dates=['date'])
# переименовываем date в timestamp, sales в target
df.rename(columns={'date': 'timestamp', 
                   'sales': 'target'}, inplace=True)
df

Unnamed: 0,timestamp,store,item,target
0,2013-01-01,1,1,13
1,2013-01-02,1,1,11
2,2013-01-03,1,1,14
3,2013-01-04,1,1,13
4,2013-01-05,1,1,10
...,...,...,...,...
912995,2017-12-27,10,50,63
912996,2017-12-28,10,50,59
912997,2017-12-29,10,50,74
912998,2017-12-30,10,50,62


In [5]:
# загружаем набор новых данных
df_new = pd.read_csv('/kaggle/input/demand-forecasting-kernels-only/test.csv', 
                     parse_dates=['date'])
# переименовываем date в timestamp
df_new.rename(columns={'date': 'timestamp'}, inplace=True)
ident = df_new['id']
df_new.drop('id', inplace=True, axis=1)
df_new

Unnamed: 0,timestamp,store,item
0,2018-01-01,1,1
1,2018-01-02,1,1
2,2018-01-03,1,1
3,2018-01-04,1,1
4,2018-01-05,1,1
...,...,...,...
44995,2018-03-27,10,50
44996,2018-03-28,10,50
44997,2018-03-29,10,50
44998,2018-03-30,10,50


In [6]:
# создаем сегменты - комбинации продуктовой 
# группы и номера магазина
df['segment'] = (df['store'].astype(str) + ' + ' 
                 + df['item'].astype(str))
df_new['segment'] = (df_new['store'].astype(str) + ' + ' 
                     + df_new['item'].astype(str))
df.head()

Unnamed: 0,timestamp,store,item,target,segment
0,2013-01-01,1,1,13,1 + 1
1,2013-01-02,1,1,11,1 + 1
2,2013-01-03,1,1,14,1 + 1
3,2013-01-04,1,1,13,1 + 1
4,2013-01-05,1,1,10,1 + 1


In [7]:
# присваиваем тип category столбцам store и item
for col in ['store', 'item']:
    df[col] = df[col].astype('category')
    df_new[col] = df_new[col].astype('category')

In [8]:
# формируем набор экзогенных переменных store и item
# для исторического периода
regressor_df = df[['timestamp', 'segment', 'store', 'item']].copy()
regressor_df

Unnamed: 0,timestamp,segment,store,item
0,2013-01-01,1 + 1,1,1
1,2013-01-02,1 + 1,1,1
2,2013-01-03,1 + 1,1,1
3,2013-01-04,1 + 1,1,1
4,2013-01-05,1 + 1,1,1
...,...,...,...,...
912995,2017-12-27,10 + 50,10,50
912996,2017-12-28,10 + 50,10,50
912997,2017-12-29,10 + 50,10,50
912998,2017-12-30,10 + 50,10,50


In [9]:
# формируем набор экзогенных переменных store и item
# для прогнозируемого периода
regressor_df_new = df_new.copy()
regressor_df_new

Unnamed: 0,timestamp,store,item,segment
0,2018-01-01,1,1,1 + 1
1,2018-01-02,1,1,1 + 1
2,2018-01-03,1,1,1 + 1
3,2018-01-04,1,1,1 + 1
4,2018-01-05,1,1,1 + 1
...,...,...,...,...
44995,2018-03-27,10,50,10 + 50
44996,2018-03-28,10,50,10 + 50
44997,2018-03-29,10,50,10 + 50
44998,2018-03-30,10,50,10 + 50


In [10]:
# сортируем признаки для последующей конкатенации
regressor_df = regressor_df.sort_index(axis=1, ascending=False)
regressor_df_new = regressor_df_new.sort_index(axis=1, ascending=False)

In [11]:
# конкатенируем набор с экзогенными переменными 
# для исторического периода и набор с экзогенными 
# переменными для прогнозируемого периода
regressor_df = pd.concat([regressor_df, regressor_df_new], axis=0)
# создаем новые внешние переменные
regressor_df['quarter'] = regressor_df['timestamp'].dt.quarter
regressor_df['quarter_start'] = regressor_df['timestamp'].dt.is_quarter_start
regressor_df['quarter_end'] = regressor_df['timestamp'].dt.is_quarter_end
regressor_df

Unnamed: 0,timestamp,store,segment,item,quarter,quarter_start,quarter_end
0,2013-01-01,1,1 + 1,1,1,True,False
1,2013-01-02,1,1 + 1,1,1,False,False
2,2013-01-03,1,1 + 1,1,1,False,False
3,2013-01-04,1,1 + 1,1,1,False,False
4,2013-01-05,1,1 + 1,1,1,False,False
...,...,...,...,...,...,...,...
44995,2018-03-27,10,10 + 50,50,1,False,False
44996,2018-03-28,10,10 + 50,50,1,False,False
44997,2018-03-29,10,10 + 50,50,1,False,False
44998,2018-03-30,10,10 + 50,50,1,False,False


In [12]:
# подготавливаем исторический набор эндогенных переменных
df.drop(['store', 'item'], axis=1, inplace=True)
df

Unnamed: 0,timestamp,target,segment
0,2013-01-01,13,1 + 1
1,2013-01-02,11,1 + 1
2,2013-01-03,14,1 + 1
3,2013-01-04,13,1 + 1
4,2013-01-05,10,1 + 1
...,...,...,...
912995,2017-12-27,63,10 + 50
912996,2017-12-28,59,10 + 50
912997,2017-12-29,74,10 + 50
912998,2017-12-30,62,10 + 50


In [13]:
# переводим исторический набор эндогенных 
# переменных в формат TSDataset
df = TSDataset.to_dataset(df)
df

segment,1 + 1,1 + 10,1 + 11,1 + 12,1 + 13,1 + 14,1 + 15,1 + 16,1 + 17,1 + 18,...,9 + 46,9 + 47,9 + 48,9 + 49,9 + 5,9 + 50,9 + 6,9 + 7,9 + 8,9 + 9
feature,target,target,target,target,target,target,target,target,target,target,...,target,target,target,target,target,target,target,target,target,target
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2013-01-01,13,37,37,33,37,22,42,14,13,38,...,34,6,28,11,9,36,29,30,45,27
2013-01-02,11,34,43,35,31,35,33,11,18,51,...,28,14,38,16,11,44,33,24,43,36
2013-01-03,14,32,34,41,50,26,45,12,15,42,...,41,18,24,20,8,29,19,35,34,25
2013-01-04,13,45,52,45,45,32,39,15,19,50,...,41,15,30,19,15,43,33,35,41,31
2013-01-05,10,35,45,46,49,31,47,22,16,56,...,42,13,33,16,13,53,36,28,49,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-27,14,55,43,47,49,42,62,16,29,54,...,49,19,41,26,18,52,39,44,52,44
2017-12-28,19,63,64,49,68,51,82,24,13,69,...,42,23,36,37,18,73,56,54,76,48
2017-12-29,15,56,60,58,73,42,65,11,27,66,...,58,17,48,15,20,68,56,59,73,54
2017-12-30,27,78,66,52,70,57,77,28,32,67,...,49,24,55,31,21,62,54,67,74,59


In [14]:
# переводим получившийся набор с экзогенными 
# переменными в формат TSDataset
regressor_df = TSDataset.to_dataset(regressor_df)
regressor_df

segment,1 + 1,1 + 1,1 + 1,1 + 1,1 + 1,1 + 10,1 + 10,1 + 10,1 + 10,1 + 10,...,9 + 8,9 + 8,9 + 8,9 + 8,9 + 8,9 + 9,9 + 9,9 + 9,9 + 9,9 + 9
feature,item,quarter,quarter_end,quarter_start,store,item,quarter,quarter_end,quarter_start,store,...,item,quarter,quarter_end,quarter_start,store,item,quarter,quarter_end,quarter_start,store
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2013-01-01,1,1,False,True,1,10,1,False,True,1,...,8,1,False,True,9,9,1,False,True,9
2013-01-02,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9
2013-01-03,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9
2013-01-04,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9
2013-01-05,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-03-27,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9
2018-03-28,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9
2018-03-29,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9
2018-03-30,1,1,False,False,1,10,1,False,False,1,...,8,1,False,False,9,9,1,False,False,9


In [15]:
# создаем объединенный набор
ts = TSDataset(df=df, freq='D', df_exog=regressor_df, 
               known_future='all')
ts

segment,1 + 1,1 + 1,1 + 1,1 + 1,1 + 1,1 + 1,1 + 10,1 + 10,1 + 10,1 + 10,...,9 + 8,9 + 8,9 + 8,9 + 8,9 + 9,9 + 9,9 + 9,9 + 9,9 + 9,9 + 9
feature,item,quarter,quarter_end,quarter_start,store,target,item,quarter,quarter_end,quarter_start,...,quarter_end,quarter_start,store,target,item,quarter,quarter_end,quarter_start,store,target
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2013-01-01,1,1,False,True,1,13.0,10,1,False,True,...,False,True,9,45.0,9,1,False,True,9,27.0
2013-01-02,1,1,False,False,1,11.0,10,1,False,False,...,False,False,9,43.0,9,1,False,False,9,36.0
2013-01-03,1,1,False,False,1,14.0,10,1,False,False,...,False,False,9,34.0,9,1,False,False,9,25.0
2013-01-04,1,1,False,False,1,13.0,10,1,False,False,...,False,False,9,41.0,9,1,False,False,9,31.0
2013-01-05,1,1,False,False,1,10.0,10,1,False,False,...,False,False,9,49.0,9,1,False,False,9,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-27,1,4,False,False,1,14.0,10,4,False,False,...,False,False,9,52.0,9,4,False,False,9,44.0
2017-12-28,1,4,False,False,1,19.0,10,4,False,False,...,False,False,9,76.0,9,4,False,False,9,48.0
2017-12-29,1,4,False,False,1,15.0,10,4,False,False,...,False,False,9,73.0,9,4,False,False,9,54.0
2017-12-30,1,4,False,False,1,27.0,10,4,False,False,...,False,False,9,74.0,9,4,False,False,9,59.0


In [16]:
# создаем экземпляр класса StandardScalerTransform
scaler = StandardScalerTransform(in_column='target')

# создаем экземпляр класса LagTransform для генерации лагов
lags = LagTransform(in_column='target', 
                    lags=[90, 120, 150, 180, 210, 240, 
                          270, 300, 330, 360], 
                    out_column='lag')

# создаем экземпляры класса MeanTransform для 
# вычисления среднего по заданному окну
mean90 = MeanTransform(in_column='target', window=90, 
                        out_column='mean90')
mean180 = MeanTransform(in_column='target', window=180, 
                        out_column='mean180')
mean210 = MeanTransform(in_column='target', window=210, 
                        out_column='mean210')
mean240 = MeanTransform(in_column='target', window=240, 
                        out_column='mean240')
mean270 = MeanTransform(in_column='target', window=270, 
                        out_column='mean270')
mean360 = MeanTransform(in_column='target', window=360, 
                        out_column='mean360')

# создаем экземпляр класса DateFlagsTransform 
# для генерации признаков на основе дат
d_flags = DateFlagsTransform(day_number_in_year=True,
                             day_number_in_week=True,
                             day_number_in_month=True,
                             week_number_in_month=True,
                             week_number_in_year=True,
                             month_number_in_year=True,
                             season_number=True,
                             is_weekend=True,
                             out_column='datetime')

In [17]:
# задаем горизонт прогнозирования
HORIZON = 90

# создаем экземпляр класса SMAPE
smape = SMAPE()

# задаем список преобразований/признаков
preprocess = [scaler, 
              lags, mean90, mean180, mean210, mean240, 
              mean270, 
              mean360, 
              d_flags]

# создаем модель LGBMMultiSegmentModel
lgbm_model = LGBMMultiSegmentModel(n_estimators=150, 
                                   learning_rate=0.1,
                                   num_leaves=10,
                                   min_data_in_leaf=120,
                                   subsample=0.8)
# оцениваем качество модели
train_and_evaluate_model(
    ts=ts,
    model=lgbm_model,
    transforms=preprocess,
    horizon=HORIZON,
    metrics=smape,
    print_plots=False,
    print_metrics=True,
    n_train_samples=None)

9 + 11      9.092804
2 + 16     15.079856
10 + 40    14.445171
7 + 21     14.513560
5 + 30     14.754667
9 + 13      8.103392
1 + 37     18.309010
9 + 32     11.832658
9 + 50      9.410367
8 + 47     15.191545
2 + 27     14.634253
6 + 44     17.098676
9 + 29     11.789622
1 + 11     11.068192
9 + 39     11.638852
7 + 31     11.142977
9 + 7      10.484969
8 + 19     11.721682
9 + 10      9.477145
4 + 42     12.545830
7 + 3      17.563190
5 + 37     18.800366
4 + 44     16.313746
6 + 19     15.042179
8 + 10      8.097014
5 + 45     10.357621
2 + 22      7.286091
2 + 8       8.325744
9 + 49     13.782158
4 + 25     10.116195
5 + 14     12.474302
8 + 20     10.475089
8 + 24      9.139679
8 + 8       9.467614
10 + 5     19.606624
7 + 2      12.858358
9 + 35     10.123747
1 + 33     10.444240
7 + 35     11.917004
5 + 46     13.535718
6 + 25      9.908239
10 + 13     7.784364
5 + 39     15.119828
4 + 10      9.326127
4 + 21     14.184020
4 + 1      17.582014
7 + 30     13.992863
8 + 29      8

In [18]:
# создаем конвейер
pipe = Pipeline(
    model=lgbm_model,
    transforms=preprocess,
    horizon=HORIZON)
        
# находим метрики моделей по сегментам 
# по итогам перекрестной проверки
metrics_df, _, _ = pipe.backtest(
    mode='expand', 
    n_folds=4,
    ts=ts, 
    metrics=[smape], 
    aggregate_metrics=True)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.




[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   29.6s remaining:    0.0s




[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   59.2s remaining:    0.0s




[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  1.5min remaining:    0.0s




[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  2.0min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  2.0min finished


In [19]:
# смотрим метрики по 4 тестовым выборках 
# для первых 2 сегментов
n_folds = 4
n_segments = 2
n = n_folds * n_segments
metrics_df.head(n)

Unnamed: 0,segment,SMAPE
0,1 + 1,18.315264
1,1 + 10,10.273194
2,1 + 11,10.049248
3,1 + 12,10.030497
4,1 + 13,9.160326
5,1 + 14,10.528913
6,1 + 15,9.019105
7,1 + 16,17.475485


In [20]:
# смотрим значение SMAPE, усредненное по сегментам
metric = metrics_df['SMAPE'].mean()
print(f'mean SMAPE: {metric:.4f}')

mean SMAPE: 12.1794


In [21]:
# создаем модель LGBMMultiSegmentModel
full_lgbm_model = LGBMMultiSegmentModel(
    n_estimators=150, 
    learning_rate=0.09,
    num_leaves=10,
    min_data_in_leaf=120,
    subsample=0.8)
# выполняем преобразования всего исторического набора
ts.fit_transform(preprocess)
full_lgbm_model.fit(ts)
# формируем набор, для которого нужно получить прогнозы,
# длина набора определяется горизонтом прогнозирования,
# по сути мы формируем набор новых данных
future_ts = ts.make_future(HORIZON, preprocess)
# получаем прогнозы для новых данных
forecast_ts = full_lgbm_model.forecast(
    future_ts)
forecast_ts.inverse_transform(preprocess)



In [22]:
# превращаем в обычный плоский датафрейм
forecast_ts = forecast_ts.to_pandas(flatten=True)
forecast_ts

Unnamed: 0,timestamp,segment,target,datetime_day_number_in_month,datetime_day_number_in_week,datetime_day_number_in_year,datetime_is_weekend,datetime_month_number_in_year,datetime_season_number,datetime_week_number_in_month,...,mean180,mean210,mean240,mean270,mean360,mean90,quarter,quarter_end,quarter_start,store
0,2018-01-01,1 + 1,12.351105,1,0,1,False,1,1,1,...,0.460990,0.568661,0.600874,0.584531,0.342744,0.054243,1,False,True,1
1,2018-01-02,1 + 1,13.373818,2,1,2,False,1,1,1,...,0.454386,0.569948,0.596523,0.583928,0.346177,0.063243,1,False,False,1
2,2018-01-03,1 + 1,13.737442,3,2,3,False,1,1,1,...,0.446869,0.566229,0.596518,0.584432,0.347135,0.063921,1,False,False,1
3,2018-01-04,1 + 1,14.981487,4,3,4,False,1,1,1,...,0.443482,0.564635,0.591482,0.586613,0.348932,0.066341,1,False,False,1
4,2018-01-05,1 + 1,15.689501,5,4,5,False,1,1,1,...,0.435817,0.570264,0.595876,0.589371,0.353664,0.063580,1,False,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,2018-03-27,9 + 9,50.482068,27,1,87,False,3,2,5,...,0.131325,0.228783,0.379544,0.562556,0.718444,-0.089464,1,False,False,9
44996,2018-03-28,9 + 9,52.223485,28,2,88,False,3,2,5,...,0.120750,0.222652,0.379913,0.559538,0.718684,0.015022,1,False,False,9
44997,2018-03-29,9 + 9,55.429529,29,3,89,False,3,2,5,...,0.110661,0.219666,0.378985,0.550685,0.723535,0.026021,1,False,False,9
44998,2018-03-30,9 + 9,59.886414,30,4,90,False,3,2,5,...,0.108328,0.209541,0.379793,0.553401,0.724526,-0.270939,1,False,False,9


In [23]:
# формируем посылку
subm = (
    df_new.drop(['store', 'item'], axis=1)
    .merge(forecast_ts, on=['timestamp', 'segment'])
    ['target'].reset_index()
    .rename({'index': 'id', 'target': 'sales'}, axis=1)
    )
subm

Unnamed: 0,id,sales
0,0,12.351105
1,1,13.373818
2,2,13.737442
3,3,14.981487
4,4,15.689501
...,...,...
44995,44995,69.229315
44996,44996,71.421153
44997,44997,76.222190
44998,44998,82.150592


In [24]:
# записываем посылку в виде CSV-файла
subm.to_csv('kaggle_store_item_demand_submission.csv', index=False)