# Решение

### Рекурсивный подход

In [1]:
import os
os.chdir(os.path.dirname(os.getcwd()))
from tqdm import tqdm
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta

In [2]:
def add_master_data_mappings(root_path: str,df: pd.DataFrame) -> pd.DataFrame:
    # = Пути к справочникам - откорректировать если в реальной системе будут лежать по другому адресу =
    client_mapping_file = root_path+"client_mapping.csv"
    freight_mapping_file = root_path+"freight_mapping.csv"
    station_mapping_file = root_path+"station_mapping.csv"

    # Клиент - холдинг
    client_mapping = pd.read_csv(
        client_mapping_file,
        sep=";",
        decimal=",",
        encoding="windows-1251",
    )
    df = pd.merge(df, client_mapping, how="left", on="client_sap_id")

    # Груз
    freight_mapping = pd.read_csv(
        freight_mapping_file, sep=";", decimal=",", encoding="windows-1251"
    )
    df = pd.merge(df, freight_mapping, how="left", on="freight_id")

    # Станции
    station_mapping = pd.read_csv(
        station_mapping_file,
        sep=";",
        decimal=",",
        encoding="windows-1251",
    )
    df = pd.merge(
        df,
        station_mapping.add_prefix("sender_"),
        how="left",
        on="sender_station_id",
    )
    df = pd.merge(
        df,
        station_mapping.add_prefix("recipient_"),
        how="left",
        on="recipient_station_id",
    )

    return df


def evaluate(fact: pd.DataFrame, forecast: pd.DataFrame, public: bool = True) -> float:
    # = Параметры для расчета метрики =
    accuracy_granularity = [
        "period",
        "rps",
        "holding_name",
        "sender_department_name",
        "recipient_department_name",
    ]
    fact_value, forecast_value = "real_wagon_count", "forecast_wagon_count"
    if public:
        metric_weight = np.array([0.0, 1.0, 0.0, 0.0, 0.0])
    else:
        metric_weight = np.array([0.1, 0.6, 0.1, 0.1, 0.1])

    # = Собственно расчет метрик =
    # 1. Добавляем сущности верхних уровней гранулярности по справочникам
    fact = add_master_data_mappings(fact)
    forecast = add_master_data_mappings(forecast)

    # 2. Расчет KPI
    compare_data = pd.merge(
        fact.groupby(accuracy_granularity, as_index=False)[fact_value].sum(),
        forecast.groupby(accuracy_granularity, as_index=False)[forecast_value].sum(),
        how="outer",
        on=accuracy_granularity,
    ).fillna(0)
    # Против самых хитрых - нецелочисленный прогноз вагоноотправок не принимаем
    compare_data[fact_value] = np.around(compare_data[fact_value]).astype(int)
    compare_data[forecast_value] = np.around(compare_data[forecast_value]).astype(int)

    # 3. Рассчитаем метрики для каждого месяца в выборке
    compare_data["ABS_ERR"] = abs(
        compare_data[forecast_value] - compare_data[fact_value]
    )
    compare_data["MAX"] = abs(compare_data[[forecast_value, fact_value]].max(axis=1))
    summary = compare_data.groupby("period")[
        [forecast_value, fact_value, "ABS_ERR", "MAX"]
    ].sum()
    summary["Forecast Accuracy"] = 1 - summary["ABS_ERR"] / summary["MAX"]

    # 4. Взвесим метрики отдельных месяцев для получения одной цифры score
    score = (
        summary["Forecast Accuracy"].sort_index(ascending=True) * metric_weight
    ).sum()

    return score


def calc_score_public(fact: pd.DataFrame, forecast: pd.DataFrame) -> float:
    return evaluate(fact, forecast, public=True)


def calc_score_private(fact: pd.DataFrame, forecast: pd.DataFrame) -> float:
    return evaluate(fact, forecast, public=False)



In [3]:
#дата  начала валидации \ дата начала горизонта
test_period = ['2023-03-01','2023-04-01']
min_data_date = '2017-01-01'
val_size = 1

In [4]:

root_path = 'D:\\DataWagon\\'
# чтение данных
data = pd.read_csv(root_path+"fact_train_test.csv", sep=";", decimal=",", encoding="windows-1251")
data["period"] = data["period"].astype("datetime64[ns]")
data = add_master_data_mappings(root_path,data)
# заполняю пропуски уникальных числом
data['holding_name'] = data['holding_name'].fillna(-111111)
data['holding_name'] = data['holding_name'].astype('int')
# удаляю данные старше 2017
data = data[data['period']>=min_data_date]
#  колонки дающие уникальный ряд
FcID_columns = ['rps', 'podrod', 'filial', 'client_sap_id', 'freight_id','sender_station_id', 'recipient_station_id', 'sender_organisation_id',]
# FcID -  уникальный номер ряда
FcID_dict = data[FcID_columns].drop_duplicates()
FcID_dict['FcID'] = list(range(0,len(FcID_dict)))
data = pd.merge(data,FcID_dict,how='left',on=FcID_columns)

In [5]:
# оставляем только данные за последний месяц до начала горизонта
fc_list = data[(data['period'] >= data[data['period']==test_period[0]]['period'].max() -  relativedelta(months=1)) & (data['period'] < test_period[1])]['FcID'].drop_duplicates()
data = data[data['FcID'].isin(fc_list) == True]

In [6]:
# разбиваем ряды на группы по 1000 элементов, уникальный ID группы это GroupID
# если посл. группа меньше 750 рядов то она конкатится с предпоследней
window = 1000
groups_dict = data.groupby(by='FcID',as_index=False)['real_wagon_count'].sum().sort_values(by=['real_wagon_count'],ascending=False)
groups_ids = []
for i in range(0,(len(groups_dict)//window)+1):
    groups_ids += [i,]* window
groups_ids = groups_ids[:len(groups_dict)]
if(len(groups_ids)%window<window*.75):
    groups_ids[-(len(groups_ids)%window):] = [max(groups_ids)-1,] * (len(groups_ids)%window)
groups_dict['GroupID'] = groups_ids
data = pd.merge(data,groups_dict[['FcID','GroupID']],how='left',on=['FcID'])

In [7]:
# метод для простановки пропущенных дат
def FillMissingDates(data):
    data=data.set_index(
                ['period','FcID']
        ).unstack(
                fill_value=0
        ).asfreq(
                'MS', fill_value=0
        ).stack().sort_index(level=1).reset_index()
    data['PeriodID'] = data['period'].dt.year*1000000+ data['period'].dt.month*10000+ data['period'].dt.day*100
    return(data)

In [8]:
# метод для расчета лагов и скольз. сред.
def CreateFeas(df,lags,means,stds,target):
    predictors = []
    for lag in lags:
        df['lag_t'+str(lag)] = df.groupby(['FcID'])[target].transform(lambda x: x.shift(lag))
        predictors.append('lag_t'+str(lag))

    for mn in means:
        df['mean_t'+str(mn)] = df.groupby(['FcID'])[target].transform(lambda x: x.shift(1).rolling(mn).mean())
        predictors.append('mean_t'+str(mn))

    return(predictors,df)

In [9]:
# метод для формирования календ. признаков
def CreateDateFeas(df):
    
    df['Month'] = df['period'].dt.month
    df['Year'] = df['period'].dt.year
    period_predictors = ['Month','Year']
    return(period_predictors,df)

In [10]:
# модель 

from catboost import CatBoostRegressor
def FitModel(X_train,Y_train,X_val,Y_val,categorical_features_names,grp):
    obj='RMSE' # Poisson не взлетел,  RMSE работает лучше всего
    model_params = {'iterations': 100000,
                    'early_stopping_rounds': 100,           
                    'learning_rate': 0.05,
                    'task_type': 'CPU',
                    'depth': 5,
                    'random_seed': 100,
                    'objective': obj,
                    'l2_leaf_reg':20,
                    }
    model = CatBoostRegressor(**model_params)
    
    model.fit(
        X_train, Y_train,
        eval_set=(X_val, Y_val),
      #  cat_features=['sender_station_id','recipient_station_id'],
        verbose=False
    )
    
    return(model)    

In [11]:
# Метод прогноза, итеративно строит признаки для каждой след. точки 

def Predict(df,model,start_date,end_date,target,lags,means,stds,predictors,fcid_predictors):
    date_list = pd.date_range(start_date,end_date,freq='MS')
    predicts = []
    for curr_date in tqdm(date_list):
        df_t = df[df['period'] == df['period'].max()].sort_values(by='FcID')[['FcID']+fcid_predictors].sort_values(by='FcID')
        df_t['period'] = curr_date
        # формирование таргет фич
        for lag in lags:
            df_t['lag_t'+str(lag)] = df[df['period']==curr_date - relativedelta(months=lag)][['FcID',target]].sort_values(by='FcID')[target].values
        for mn in means:
            df_t['mean_t'+str(mn)] = df[df['period']>=curr_date - relativedelta(months=mn)].groupby(by='FcID',as_index=False)[target].mean().sort_values(by='FcID')[target].values
        # формирование календ фич
        df_t['Year'] = df_t['period'].dt.year
        df_t['Month'] = df_t['period'].dt.month
        df_t['Predict_'+target] = model.predict(df_t[predictors])
        predicts.append(df_t)
        df_t[target] = df_t['Predict_'+target]
        df = df[df['period']> df['period'].min()]
        df = pd.concat([df,df_t[df.columns]])
    return(pd.concat(predicts))

In [12]:
# быстрый расчет количества факта в выбранном количестве групп, нужно для тестирования модели, можно быстро получить сколько нужно спрогнозировать групп чтоб получить 80% факта
bnt_const = 0.98
if(bnt_const<1):
    grp_bounth = data[(data['period']<test_period[1]) & (data['period']>((data[data['period']<test_period[1]]['period'].max() - datetime.timedelta(days=28*3))))].groupby(by=['GroupID'],as_index=False)['real_wagon_count'].sum()
    grp_bounth  =  grp_bounth.sort_values(by='real_wagon_count',ascending=False)
    grp_bounth['Proc'] = grp_bounth['real_wagon_count'] / grp_bounth['real_wagon_count'].sum()
    grp_bounth['Proc'] = grp_bounth['Proc'].cumsum()
    grp_count =  grp_bounth[grp_bounth['Proc']<=bnt_const]['GroupID'].max()+1
else:
    grp_count = len(groups_dict)
print('COUNT OF GROUPS: ',grp_count)
grp_bounth[:5]

COUNT OF GROUPS:  42


Unnamed: 0,GroupID,real_wagon_count,Proc
0,0,1481199,0.550468
1,1,311536,0.666247
2,2,175181,0.73135
3,3,115668,0.774337
4,4,83603,0.805407


In [13]:
# все лаги и сред. для модели
lags = list(range(1,12))
means = [2,3,4,6,12]
stds = []
results = []
# строим модель и прогноз для каждой группы в выбранном объеме выручки ( в нашем случае 98% выручки за прошлые 3 месяца)
for grp in groups_dict['GroupID'].drop_duplicates()[:grp_count]:
    print('GROUP: ',grp)
    df = data[data['GroupID']==grp]
    fcid_predictors = ['rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id','holding_name', 'freight_group_name','sender_department_name', 'sender_railway_name','recipient_department_name', 'recipient_railway_name']
    df_dict_fcid = df[fcid_predictors + ['FcID']].drop_duplicates()
    df_dict_weight = df[['FcID','period','real_weight']] 
# заполняем пропуски
    df = FillMissingDates(df[['period','FcID','real_wagon_count']])

    df = pd.merge(df,df_dict_fcid,how='left',on='FcID')
    df = pd.merge(df,df_dict_weight,how='left',on=['FcID','period'])
    df['real_weight'] = df['real_weight'].fillna(0)
    period_predictors,df = CreateDateFeas(df)
    dfs= []
    for target in ['real_wagon_count']:
        dfs.append(CreateFeas(df,lags,means,stds,target))
        for target_predictors,curr_df in dfs:
# формируем список предикторов
            predictors = target_predictors+ fcid_predictors + period_predictors
            print(predictors)
            train = curr_df[curr_df['period']<test_period[0]].dropna().sort_values(by='period')
            val   = curr_df[(curr_df['period']>=test_period[0]) & (curr_df['period']<test_period[1])].sort_values(by='period')
            test  = curr_df[curr_df['period']>=test_period[1]].sort_values(by='period')
            start_date = val['period'].max() + relativedelta(months=1)
            end_date   = val['period'].max() + relativedelta(months=5)
            print('Start predict for dates: ',start_date,' - ',end_date)
            model = FitModel(train[predictors],train[target],val[predictors],val[target],fcid_predictors,grp)
            hist_data_date = start_date -  relativedelta(months=max([max([0]+lags),max([0]+means),max([0]+stds)]))
            test_data = curr_df[ (curr_df['period']>=hist_data_date) & (curr_df['period']<start_date) ][['period','FcID',target]+predictors]
            result = Predict(test_data,model,start_date,end_date,target,lags,means,stds,predictors,fcid_predictors)
            results.append(result)
# сохр. прогноз
results = pd.concat(results)
results_sv = results.copy()

#feature_importance = model.feature_importances_
#sorted_idx = np.argsort(feature_importance)
#fig = plt.figure(figsize=(9, 16))
#plt.barh(range(len(sorted_idx)), feature_importance[sorted_idx], align='center')
#plt.yticks(range(len(sorted_idx)), np.array(predictors)[sorted_idx])
#plt.title('Feature Importance')
#plt.show()
                

GROUP:  0


['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.33it/s]


GROUP:  1
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.78it/s]


GROUP:  2
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.37it/s]


GROUP:  3
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.08it/s]


GROUP:  4
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.61it/s]


GROUP:  5
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.88it/s]


GROUP:  6
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.93it/s]


GROUP:  7
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.13it/s]


GROUP:  8
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.56it/s]


GROUP:  9
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 12.02it/s]


GROUP:  10
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.18it/s]


GROUP:  11
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.81it/s]


GROUP:  12
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.61it/s]


GROUP:  13
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.97it/s]


GROUP:  14
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.74it/s]


GROUP:  15
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.09it/s]


GROUP:  16
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.20it/s]


GROUP:  17
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.71it/s]


GROUP:  18
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.01it/s]


GROUP:  19
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.96it/s]


GROUP:  20
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.79it/s]


GROUP:  21
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.08it/s]


GROUP:  22
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.05it/s]


GROUP:  23
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.98it/s]


GROUP:  24
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.95it/s]


GROUP:  25
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.73it/s]


GROUP:  26
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.96it/s]


GROUP:  27
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.58it/s]


GROUP:  28
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.69it/s]


GROUP:  29
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.35it/s]


GROUP:  30
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.31it/s]


GROUP:  31
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.53it/s]


GROUP:  32
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.41it/s]


GROUP:  33
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.92it/s]


GROUP:  34
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.00it/s]


GROUP:  35
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.00it/s]


GROUP:  36
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.11it/s]


GROUP:  37
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.95it/s]


GROUP:  38
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.67it/s]


GROUP:  39
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.91it/s]


GROUP:  40
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.56it/s]


GROUP:  41
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.70it/s]


### shift модель

In [14]:
bnt_const = 0.98
if(bnt_const<1):
    grp_bounth = data[(data['period']<test_period[1]) & (data['period']>((data[data['period']<test_period[1]]['period'].max() - datetime.timedelta(days=28*3))))].groupby(by=['GroupID'],as_index=False)['real_wagon_count'].sum()
    grp_bounth  =  grp_bounth.sort_values(by='real_wagon_count',ascending=False)
    grp_bounth['Proc'] = grp_bounth['real_wagon_count'] / grp_bounth['real_wagon_count'].sum()
    grp_bounth['Proc'] = grp_bounth['Proc'].cumsum()
    grp_count =  grp_bounth[grp_bounth['Proc']<=bnt_const]['GroupID'].max()+1
else:
    grp_count = len(groups_dict)
print('COUNT OF GROUPS: ',grp_count)
grp_bounth[:5]

COUNT OF GROUPS:  42


Unnamed: 0,GroupID,real_wagon_count,Proc
0,0,1481199,0.550468
1,1,311536,0.666247
2,2,175181,0.73135
3,3,115668,0.774337
4,4,83603,0.805407


In [15]:
def Predict(df,model,start_date,end_date,target,lags,means,stds,predictors,fcid_predictors):
    date_list = pd.date_range(start_date,end_date,freq='MS')
    predicts = []
    i=1
    for curr_date in tqdm(date_list):
        df_t = df[df['period'] == df['period'].max()].sort_values(by='FcID')[['FcID']+fcid_predictors].sort_values(by='FcID')
        df_t['period'] = curr_date
        # формирование фичей
        for lag in lags:
            df_t['lag_t'+str(lag)] = df[df['period']==df['period'].max() - relativedelta(months=lag-1)][['FcID',target]].sort_values(by='FcID')[target].values
        for mn in means:
            df_t['mean_t'+str(mn)] = df[df['period']>=df['period'].max() - relativedelta(months=mn-1)].groupby(by='FcID',as_index=False)[target].mean().sort_values(by='FcID')[target].values
            
        df_t['Year'] = df_t['period'].dt.year
        df_t['Month'] = df_t['period'].dt.month
        # фича горизонта
        df_t['hor'] = i
        i+=1
        df_t['Predict_'+target] = model.predict(df_t[predictors])
        predicts.append(df_t)
    return(pd.concat(predicts))

In [16]:
lags = list(range(1,12))
means = [2,3,4,6,12]
stds = []
results = []
for grp in groups_dict['GroupID'].drop_duplicates()[:grp_count]:
    print('GROUP: ',grp)
    df = data[data['GroupID']==grp]
    fcid_predictors = ['rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id','holding_name', 'freight_group_name','sender_department_name', 'sender_railway_name','recipient_department_name', 'recipient_railway_name']
    df_dict_fcid = df[fcid_predictors + ['FcID']].drop_duplicates()
    df_dict_weight = df[['FcID','period','real_weight']] 
    df = FillMissingDates(df[['period','FcID','real_wagon_count']])
    df = pd.merge(df,df_dict_fcid,how='left',on='FcID')
    df = pd.merge(df,df_dict_weight,how='left',on=['FcID','period'])
    df['real_weight'] = df['real_weight'].fillna(0)
    period_predictors,df = CreateDateFeas(df)
    dfs= []
    for target in ['real_wagon_count']:
        dfs.append(CreateFeas(df,lags,means,stds,target))
        for target_predictors,curr_df in dfs:
            all_dfs=[]
            # множим данные в наборе данных для каждого горизонта (прогноз 5 точек вперед, размер данных умножается в 5 раз )
            for i in range(1,6):
                if(i==1):
                    curr_df['hor'] = i
                    all_dfs.append(curr_df)
                else:
                    curr_df_ = curr_df.copy()
                    curr_df_['hor'] = i
                    curr_df_[target] = curr_df_.groupby(['FcID'])[target].transform(lambda x: x.shift(- (i-1)))
                    all_dfs.append(curr_df_)
            curr_df = pd.concat(all_dfs)

            predictors = target_predictors+ fcid_predictors + period_predictors + ['hor']
            print(predictors)
            train = curr_df[curr_df['period']<test_period[0]].dropna().sort_values(by='period')
            val   = curr_df[(curr_df['period']>=test_period[0]) & (curr_df['period']<test_period[1])].dropna().sort_values(by='period')
            test  = curr_df[curr_df['period']>=test_period[1]].sort_values(by='period')
            start_date = val['period'].max() + relativedelta(months=1)
            end_date   = val['period'].max() + relativedelta(months=5)
            print('Start predict for dates: ',start_date,' - ',end_date)
            model = FitModel(train[predictors],train[target],val[predictors],val[target],fcid_predictors,grp)
            hist_data_date = start_date -  relativedelta(months=max([max([0]+lags),max([0]+means),max([0]+stds)]))
            test_data = curr_df[ (curr_df['hor']==1) &  (curr_df['period']>=hist_data_date) & (curr_df['period']<start_date) ][['period','FcID',target]+predictors]
            result = Predict(test_data,model,start_date,end_date,target,lags,means,stds,predictors,fcid_predictors)
            results.append(result)

results = pd.concat(results)
results_sv_hor = results.copy()


GROUP:  0


['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 12.05it/s]


GROUP:  1
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 12.50it/s]


GROUP:  2
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  8.79it/s]


GROUP:  3
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.85it/s]


GROUP:  4
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.68it/s]


GROUP:  5
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.71it/s]


GROUP:  6
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 12.00it/s]


GROUP:  7
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.54it/s]


GROUP:  8
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.47it/s]


GROUP:  9
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.45it/s]


GROUP:  10
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  8.24it/s]


GROUP:  11
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.34it/s]


GROUP:  12
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.56it/s]


GROUP:  13
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.22it/s]


GROUP:  14
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.59it/s]


GROUP:  15
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.22it/s]


GROUP:  16
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.00it/s]


GROUP:  17
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.77it/s]


GROUP:  18
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.30it/s]


GROUP:  19
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.58it/s]


GROUP:  20
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.94it/s]


GROUP:  21
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.33it/s]


GROUP:  22
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.78it/s]


GROUP:  23
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.73it/s]


GROUP:  24
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.50it/s]


GROUP:  25
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 11.14it/s]


GROUP:  26
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.38it/s]


GROUP:  27
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.35it/s]


GROUP:  28
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.72it/s]


GROUP:  29
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.59it/s]


GROUP:  30
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.37it/s]


GROUP:  31
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00, 10.13it/s]


GROUP:  32
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.28it/s]


GROUP:  33
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  8.85it/s]


GROUP:  34
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.57it/s]


GROUP:  35
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.37it/s]


GROUP:  36
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.34it/s]


GROUP:  37
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  7.26it/s]


GROUP:  38
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  8.86it/s]


GROUP:  39
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.61it/s]


GROUP:  40
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.32it/s]


GROUP:  41
['lag_t1', 'lag_t2', 'lag_t3', 'lag_t4', 'lag_t5', 'lag_t6', 'lag_t7', 'lag_t8', 'lag_t9', 'lag_t10', 'lag_t11', 'mean_t2', 'mean_t3', 'mean_t4', 'mean_t6', 'mean_t12', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'holding_name', 'freight_group_name', 'sender_department_name', 'sender_railway_name', 'recipient_department_name', 'recipient_railway_name', 'Month', 'Year', 'hor']
Start predict for dates:  2023-04-01 00:00:00  -  2023-08-01 00:00:00


100%|██████████| 5/5 [00:00<00:00,  9.22it/s]


In [20]:
result_ensemble = results_sv.copy()
result_ensemble['Predict_'+target] = (results_sv['Predict_'+target]+results_sv_hor['Predict_'+target])/2
result_ensemble['forecast_weight'] = 0
result_ensemble['forecast_wagon_count'] = result_ensemble['Predict_real_wagon_count'].astype(int)
result_ensemble  = result_ensemble[['period','rps','podrod','filial','client_sap_id','freight_id','sender_station_id','recipient_station_id','sender_organisation_id','forecast_weight','forecast_wagon_count']]
result_ensemble.to_csv('forecast_ex_ensemble.csv',';',index=False)