### Demand Restoration

In [1]:
import numpy as np
import pandas as pd
import math
import random


import optuna
import xgboost as xgb
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import Ridge
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import restore_demand_beta as dem

import warnings
warnings.filterwarnings('ignore')

In [7]:
import preprocessing_methods as prepro

In [13]:
# ПЕРВЫЙ ВАРИАНТ ДАННЫХ


df = pd.read_csv('resultData.csv')
df.head()


Unnamed: 0,product_id,location_id,date,sales_qty,stock_qty,cost,promo
0,8000054507,5637146288,2020-06-09 00:00:00,1.0,0,717.5,0
1,8000054507,9000135750,2020-06-13 00:00:00,1.0,1,717.5,0
2,8000054507,5637146288,2020-06-08 00:00:00,1.0,1,717.5,0
3,8000145055,5637146303,2020-06-16 00:00:00,0.0,4,83.7728,0
4,9000883500,5637146282,2020-06-15 00:00:00,,4,,0


In [14]:
product_ids = df.product_id.unique()
location_ids = df.location_id.unique()

### Препроцессинг данных

In [15]:
def add_lag_features(df, product_list):
    df_train = pd.DataFrame()
    for product in product_list:
        
        df_model = df[df.product_id.isin([product])]
        df_model = prepro.df_preprocessing(df_model)
        ##preprocessed_df = prepro.df_preprocess_windows(df_model)
        
        df_train = df_train.append( df_model)

    return df_train

In [16]:
preprocessed_df = add_lag_features(df[df.product_id.isin(product_ids)], product_ids)

In [17]:
preprocessed_df.head()

Unnamed: 0,product_id,location_id,date,sales_qty,stock_qty,cost,promo,day,weekday,month,year,deficit
0,8000054507,5637146288,0,1.0,0,717.5,0,1,3,1,1970,1
1,8000054507,9000135750,1,1.0,1,717.5,0,1,3,1,1970,1
2,8000054507,5637146288,2,1.0,1,717.5,0,1,3,1,1970,1
11,8000054507,5637146303,11,0.0,1796,717.5,0,1,3,1,1970,0
12,8000054507,5637146303,12,0.0,1794,717.5,0,1,3,1,1970,0


In [18]:
preprocessed_df.to_csv('resultDataV2.csv', index=False)

## Стоп

In [8]:
df_train_demand = preprocessed_df[preprocessed_df.deficit == 0]
df_train_demand["real demand"] = df_train_demand['sales_qty']

In [9]:
df_train_demand.head()

Unnamed: 0,product_id,location_id,date,sales_qty,flg_spromo,stock_qty,day,weekday,month,year,deficit,s_qty win15,s_qty win15 m7,s_qty win15 p7,s_qty win7,s_qty win7 m7,s_qty win7 p7,real demand
1138926,555800,4600,1138926,2.0,0,66.0,1,3,1,1970,0,1.833333,1.857143,2.0,1.6,2.285714,2.0,2.0
1138927,555800,4600,1138927,1.0,0,64.0,1,3,1,1970,0,1.916667,1.846154,1.888889,1.8,2.142857,2.0,1.0
1138928,555800,4600,1138928,1.0,0,63.0,1,3,1,1970,0,1.916667,1.916667,1.8,1.833333,2.0,2.0,1.0
1138929,555800,4600,1138929,2.0,0,62.0,1,3,1,1970,0,2.076923,1.916667,1.818182,1.714286,2.0,2.25,2.0
1138930,555800,4600,1138930,3.0,0,60.0,1,3,1,1970,0,2.0,1.916667,1.909091,1.571429,2.166667,2.0,3.0


### Feature generation for ML algorithms

**Questions**

- What features should be added to the original data set?
- Calendar-dummy variables: holidays, day of week, day of month
Lagged-features
- x-sindicated features (e.g price x store_number**2)

#### Lagged Features Generation

In [None]:
4

### Создание модели

In [10]:
# Создание метрики
def my_smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

smape = make_scorer(my_smape, greater_is_better=False)

In [11]:
y = df_train_demand["real demand"]
X = df_train_demand.drop(columns=["real demand", "sales_qty", "stock_qty", "date","s_qty win15",
    "s_qty win15 m7",
    "s_qty win15 p7",
    "s_qty win7",
    "s_qty win7 m7",
    "s_qty win7 p7"])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=808)

In [16]:
numeric = [
]

categorical = [
    "product_id",
    "location_id",
    "day",
    "weekday",
    "month",
    "year",
    "deficit"
]

In [17]:
column_transformer = ColumnTransformer(
    [
        ("ohe", OneHotEncoder(handle_unknown="ignore"), categorical),
        ("scaling", StandardScaler(), numeric),
    ]
)

In [18]:
def objective(trial):
    y = df_train_demand["real demand"]
    X = df_train_demand.drop(columns=["real demand", "sales_qty", "stock_qty", "date","s_qty win15",
    "s_qty win15 m7",
    "s_qty win15 p7",
    "s_qty win7",
    "s_qty win7 m7",
    "s_qty win7 p7"], axis=1)
    
    

    pipeline = Pipeline(
        steps=[
            ("ohe_and_scaling", column_transformer),
        ]
    )

    X = pipeline.fit_transform(X, y)
    

    train_x, test_x, train_y, test_y = train_test_split(
        X, y, test_size=0.2, random_state=808
    )

    

    param = {
        "lambda": trial.suggest_loguniform("lambda", 1e-3, 10.0),
        "alpha": trial.suggest_loguniform("alpha", 1e-3, 10.0),
        "colsample_bytree": trial.suggest_categorical(
            "colsample_bytree", [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        ),
        "subsample": trial.suggest_categorical(
            "subsample", [0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
        ),
        "learning_rate": trial.suggest_categorical(
            "learning_rate", [0.17, 0.2, 0.22, 0.26, 0.28, 0.31]
        ),
        "n_estimators": 4000,
        "max_depth": trial.suggest_categorical("max_depth", [3, 4, 5, 6]),
        "random_state": trial.suggest_categorical(
            "random_state", [24, 808, 2020]
        ),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 300),
    }
    model = xgb.XGBRegressor(**param)
    
    model.fit(
        train_x,
        train_y,
        eval_set=[(test_x, test_y)],
        early_stopping_rounds=100,
        verbose=False,
    )
    preds = model.predict(test_x)
    smape_error = my_smape(preds, test_y)

    return smape_error

In [19]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=5)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2023-05-07 17:48:14,944][0m A new study created in memory with name: no-name-7d07423a-e8f3-432b-a708-bd1765ec73e8[0m
[32m[I 2023-05-07 17:48:15,773][0m Trial 0 finished with value: 53.205798922649016 and parameters: {'lambda': 0.6959848230651667, 'alpha': 0.12513812657355064, 'colsample_bytree': 0.9, 'subsample': 0.4, 'learning_rate': 0.22, 'max_depth': 3, 'random_state': 808, 'min_child_weight': 223}. Best is trial 0 with value: 53.205798922649016.[0m
[32m[I 2023-05-07 17:48:27,127][0m Trial 1 finished with value: 52.70192987296909 and parameters: {'lambda': 0.003318736365744125, 'alpha': 0.3281848627232579, 'colsample_bytree': 0.9, 'subsample': 1.0, 'learning_rate': 0.2, 'max_depth': 4, 'random_state': 2020, 'min_child_weight': 92}. Best is trial 1 with value: 52.70192987296909.[0m
[32m[I 2023-05-07 17:48:27,760][0m Trial 2 finished with value: 61.35588662392864 and parameters: {'lambda': 0.003823682721322329, 'alpha': 0.23661215273852007, 'colsample_bytree': 0.3, '

Number of finished trials: 5
Best trial: {'lambda': 1.4019300068519949, 'alpha': 0.012117047478469662, 'colsample_bytree': 0.7, 'subsample': 0.8, 'learning_rate': 0.22, 'max_depth': 4, 'random_state': 2020, 'min_child_weight': 154}


In [20]:
study.best_trial.params

{'lambda': 1.4019300068519949,
 'alpha': 0.012117047478469662,
 'colsample_bytree': 0.7,
 'subsample': 0.8,
 'learning_rate': 0.22,
 'max_depth': 4,
 'random_state': 2020,
 'min_child_weight': 154}

In [21]:
xgb_cool = xgb.XGBRegressor(**study.best_trial.params)

In [22]:
pipeline = Pipeline(
        steps=[
            ("ohe_and_scaling", column_transformer),
            ("xgb", xgb_cool)
        ]
    )

In [23]:
model = pipeline
print((-cross_val_score(model, X, y, cv=5, scoring=smape)).sum() / 5)

56.270399499552624


In [145]:
linreg = Ridge()

In [146]:
pipeline = Pipeline(
        steps=[
            ("ohe_and_scaling", column_transformer),
            ("linreg", linreg)
        ]
    )

In [147]:
model = pipeline
print((-cross_val_score(model, X, y, cv=5, scoring=smape)).sum() / 5)

54.24613065863425


### Создание модели для каждой пары товар-магазин

In [318]:
def objective2(trial, X, y):    

    pipeline = Pipeline(
        steps=[
            ("ohe_and_scaling", column_transformer),
        ]
    )

    X = pipeline.fit_transform(X, y)
    

    train_x, test_x, train_y, test_y = train_test_split(
        X, y, test_size=0.2, random_state=808
    )

    

    param = {
        "lambda": trial.suggest_loguniform("lambda", 1e-3, 10.0),
        "alpha": trial.suggest_loguniform("alpha", 1e-3, 10.0),
        "colsample_bytree": trial.suggest_categorical(
            "colsample_bytree", [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        ),
        "subsample": trial.suggest_categorical(
            "subsample", [0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
        ),
        "learning_rate": trial.suggest_categorical(
            "learning_rate", [0.17, 0.2, 0.22, 0.26, 0.28, 0.31]
        ),
        "n_estimators": 4000,
        "max_depth": trial.suggest_categorical("max_depth", [3, 4, 5, 6]),
        "random_state": trial.suggest_categorical(
            "random_state", [24, 808, 2020]
        ),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 300),
    }
    model = xgb.XGBRegressor(**param)
    
    model.fit(
        train_x,
        train_y,
        eval_set=[(test_x, test_y)],
        early_stopping_rounds=100,
        verbose=False,
    )
    preds = model.predict(test_x)
    smape_error = my_smape(preds, test_y)

    return smape_error

In [322]:
from tqdm.notebook import trange, tqdm

In [468]:
general_smape = []
n = 0


for product in tqdm(product_ids):
    for location in location_ids:
        tmp_df = df_train_demand[df_train_demand.product_id.isin([product]) & df_train_demand.location_id.isin([location])]
        
        
        y1 = tmp_df["real demand"]
        X1 = tmp_df.drop(columns=["real demand", "sales_qty", "stock_qty", "date"])
        
        study = optuna.create_study(direction='minimize')
        study.optimize(lambda trial : objective2(trial,X1, y1),n_trials=2)
        
        
        xgb_cool = xgb.XGBRegressor(**study.best_trial.params)
        
        pipeline = Pipeline(
        steps=[
                ("ohe_and_scaling", column_transformer),
                ("xgb", xgb_cool)
            ]
        )
        
        model = pipeline
        
        general_smape.append((-cross_val_score(model, X, y, cv=5, scoring=smape)).sum() / 5)
        n +=1

  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-05-03 09:19:04,880][0m A new study created in memory with name: no-name-42aacdcf-1afe-4d7d-a742-174a7195fef0[0m
[32m[I 2023-05-03 09:19:05,104][0m Trial 0 finished with value: 13.258442014797842 and parameters: {'lambda': 0.0028728504027275767, 'alpha': 6.360899799918878, 'colsample_bytree': 0.7, 'subsample': 0.6, 'learning_rate': 0.31, 'max_depth': 6, 'random_state': 24, 'min_child_weight': 15}. Best is trial 0 with value: 13.258442014797842.[0m
[32m[I 2023-05-03 09:19:05,217][0m Trial 1 finished with value: 199.87519443377192 and parameters: {'lambda': 0.003908969280922672, 'alpha': 0.0026298011968480333, 'colsample_bytree': 0.8, 'subsample': 0.6, 'learning_rate': 0.26, 'max_depth': 5, 'random_state': 808, 'min_child_weight': 87}. Best is trial 0 with value: 13.258442014797842.[0m
[32m[I 2023-05-03 09:19:09,660][0m A new study created in memory with name: no-name-4d51f36f-9915-4d8f-9c01-830eda9caca4[0m
[32m[I 2023-05-03 09:19:09,960][0m Trial 0 finished with

[32m[I 2023-05-03 09:19:45,142][0m Trial 0 finished with value: 199.7845424260018 and parameters: {'lambda': 1.2203907362991464, 'alpha': 0.007847420177508706, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.22, 'max_depth': 6, 'random_state': 808, 'min_child_weight': 104}. Best is trial 0 with value: 199.7845424260018.[0m
[32m[I 2023-05-03 09:19:45,262][0m Trial 1 finished with value: 199.7845424260018 and parameters: {'lambda': 0.5200747229662542, 'alpha': 0.490691419425485, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.28, 'max_depth': 5, 'random_state': 24, 'min_child_weight': 259}. Best is trial 0 with value: 199.7845424260018.[0m
[32m[I 2023-05-03 09:19:49,617][0m A new study created in memory with name: no-name-8fb2183f-27cc-4685-b017-0b3489e5d3d8[0m
[32m[I 2023-05-03 09:19:49,788][0m Trial 0 finished with value: 103.15331678634465 and parameters: {'lambda': 0.2128466773190968, 'alpha': 0.0013943537787926173, 'colsample_bytree': 0.3, 'sub

[32m[I 2023-05-03 09:20:22,320][0m Trial 1 finished with value: 199.87080160717295 and parameters: {'lambda': 9.729730789302582, 'alpha': 0.008408953186723432, 'colsample_bytree': 0.9, 'subsample': 0.4, 'learning_rate': 0.28, 'max_depth': 4, 'random_state': 2020, 'min_child_weight': 220}. Best is trial 0 with value: 199.87080160717295.[0m
[32m[I 2023-05-03 09:20:26,532][0m A new study created in memory with name: no-name-68c94bc7-6b69-4e2c-8e43-327c99296f24[0m
[32m[I 2023-05-03 09:20:26,658][0m Trial 0 finished with value: 199.8370681120717 and parameters: {'lambda': 0.055591409156915, 'alpha': 0.02945263459646592, 'colsample_bytree': 0.3, 'subsample': 0.5, 'learning_rate': 0.17, 'max_depth': 4, 'random_state': 808, 'min_child_weight': 96}. Best is trial 0 with value: 199.8370681120717.[0m
[32m[I 2023-05-03 09:20:26,795][0m Trial 1 finished with value: 15.162069958510616 and parameters: {'lambda': 0.004799195904475214, 'alpha': 1.6050591906722351, 'colsample_bytree': 0.5, 's

[32m[I 2023-05-03 09:21:01,126][0m A new study created in memory with name: no-name-4e119a0b-645b-449e-83cd-beb64733e25d[0m
[32m[I 2023-05-03 09:21:01,245][0m Trial 0 finished with value: 199.75794658748524 and parameters: {'lambda': 0.0063436989206182995, 'alpha': 0.006570238760505856, 'colsample_bytree': 0.6, 'subsample': 0.4, 'learning_rate': 0.17, 'max_depth': 4, 'random_state': 808, 'min_child_weight': 186}. Best is trial 0 with value: 199.75794658748524.[0m
[32m[I 2023-05-03 09:21:01,535][0m Trial 1 finished with value: 23.81112656274955 and parameters: {'lambda': 0.028422951786628618, 'alpha': 0.24913702818510922, 'colsample_bytree': 0.5, 'subsample': 0.6, 'learning_rate': 0.22, 'max_depth': 5, 'random_state': 24, 'min_child_weight': 93}. Best is trial 1 with value: 23.81112656274955.[0m
[32m[I 2023-05-03 09:21:05,181][0m A new study created in memory with name: no-name-1541fd7f-9a1f-4de9-a710-5edee0b5eae0[0m
[32m[I 2023-05-03 09:21:05,299][0m Trial 0 finished with

[32m[I 2023-05-03 09:21:37,786][0m Trial 0 finished with value: 199.878166584131 and parameters: {'lambda': 0.57524598434777, 'alpha': 1.2613791175421332, 'colsample_bytree': 0.6, 'subsample': 0.5, 'learning_rate': 0.22, 'max_depth': 3, 'random_state': 24, 'min_child_weight': 216}. Best is trial 0 with value: 199.878166584131.[0m
[32m[I 2023-05-03 09:21:37,896][0m Trial 1 finished with value: 199.878166584131 and parameters: {'lambda': 0.11074430541949663, 'alpha': 1.0176283759317688, 'colsample_bytree': 0.8, 'subsample': 0.8, 'learning_rate': 0.17, 'max_depth': 4, 'random_state': 24, 'min_child_weight': 264}. Best is trial 0 with value: 199.878166584131.[0m
[32m[I 2023-05-03 09:21:40,240][0m A new study created in memory with name: no-name-22381345-f492-438f-8fa0-ec50b636bc68[0m
[32m[I 2023-05-03 09:21:40,363][0m Trial 0 finished with value: 199.71066187776486 and parameters: {'lambda': 1.2698205585143667, 'alpha': 1.1964996555937595, 'colsample_bytree': 0.4, 'subsample': 0

[32m[I 2023-05-03 09:22:12,611][0m Trial 1 finished with value: 22.30047065312891 and parameters: {'lambda': 0.07792955028497656, 'alpha': 3.809235524276456, 'colsample_bytree': 1.0, 'subsample': 1.0, 'learning_rate': 0.28, 'max_depth': 3, 'random_state': 2020, 'min_child_weight': 59}. Best is trial 1 with value: 22.30047065312891.[0m
[32m[I 2023-05-03 09:22:15,142][0m A new study created in memory with name: no-name-abc287b9-c731-4a2f-9d42-ac42e88e6d17[0m
[32m[I 2023-05-03 09:22:15,372][0m Trial 0 finished with value: 15.024345318293953 and parameters: {'lambda': 0.0021480063304841926, 'alpha': 0.0011743728975044032, 'colsample_bytree': 0.8, 'subsample': 0.6, 'learning_rate': 0.28, 'max_depth': 3, 'random_state': 24, 'min_child_weight': 70}. Best is trial 0 with value: 15.024345318293953.[0m
[32m[I 2023-05-03 09:22:15,489][0m Trial 1 finished with value: 199.8119284666392 and parameters: {'lambda': 0.003479447684533347, 'alpha': 0.3171691777330954, 'colsample_bytree': 0.3, 

[32m[I 2023-05-03 09:22:52,175][0m A new study created in memory with name: no-name-530e8c5e-a812-4fd9-99bd-cddb9b233b29[0m
[32m[I 2023-05-03 09:22:52,344][0m Trial 0 finished with value: 15.810597716049037 and parameters: {'lambda': 0.3734234917180161, 'alpha': 0.004183871559341745, 'colsample_bytree': 1.0, 'subsample': 0.7, 'learning_rate': 0.31, 'max_depth': 3, 'random_state': 808, 'min_child_weight': 58}. Best is trial 0 with value: 15.810597716049037.[0m
[32m[I 2023-05-03 09:22:52,461][0m Trial 1 finished with value: 199.7777142433938 and parameters: {'lambda': 0.007292740412801766, 'alpha': 0.02010457177009365, 'colsample_bytree': 0.4, 'subsample': 0.8, 'learning_rate': 0.22, 'max_depth': 4, 'random_state': 24, 'min_child_weight': 159}. Best is trial 0 with value: 15.810597716049037.[0m
[32m[I 2023-05-03 09:22:55,335][0m A new study created in memory with name: no-name-1d53c611-0928-42ec-b232-4216b959b0a3[0m
[32m[I 2023-05-03 09:22:55,453][0m Trial 0 finished with v

[32m[I 2023-05-03 09:23:27,508][0m Trial 0 finished with value: 199.8754974933554 and parameters: {'lambda': 0.09732897365649057, 'alpha': 0.017451606207822566, 'colsample_bytree': 1.0, 'subsample': 0.4, 'learning_rate': 0.17, 'max_depth': 6, 'random_state': 808, 'min_child_weight': 268}. Best is trial 0 with value: 199.8754974933554.[0m
[32m[I 2023-05-03 09:23:27,867][0m Trial 1 finished with value: 12.56900802238298 and parameters: {'lambda': 0.6287884323296923, 'alpha': 0.004586071349848979, 'colsample_bytree': 0.9, 'subsample': 0.8, 'learning_rate': 0.31, 'max_depth': 6, 'random_state': 24, 'min_child_weight': 3}. Best is trial 1 with value: 12.56900802238298.[0m
[32m[I 2023-05-03 09:23:32,863][0m A new study created in memory with name: no-name-2112be3d-4fdf-4bf3-b504-83064372f2fc[0m
[32m[I 2023-05-03 09:23:33,026][0m Trial 0 finished with value: 26.589227011225407 and parameters: {'lambda': 0.4135894580868154, 'alpha': 0.0032352464623745534, 'colsample_bytree': 0.8, 's

[32m[I 2023-05-03 09:24:06,728][0m Trial 1 finished with value: 199.8808052051542 and parameters: {'lambda': 0.0822670727279614, 'alpha': 2.6466596445992163, 'colsample_bytree': 0.7, 'subsample': 1.0, 'learning_rate': 0.22, 'max_depth': 5, 'random_state': 2020, 'min_child_weight': 210}. Best is trial 0 with value: 11.584984918713259.[0m
[32m[I 2023-05-03 09:24:09,330][0m A new study created in memory with name: no-name-bfefaa97-c6b4-46b4-b32c-ef76e4613cbf[0m
[32m[I 2023-05-03 09:24:09,429][0m Trial 0 finished with value: 199.7757522193904 and parameters: {'lambda': 0.3487315272433153, 'alpha': 0.23523904576039192, 'colsample_bytree': 0.5, 'subsample': 0.4, 'learning_rate': 0.17, 'max_depth': 5, 'random_state': 2020, 'min_child_weight': 188}. Best is trial 0 with value: 199.7757522193904.[0m
[32m[I 2023-05-03 09:24:09,536][0m Trial 1 finished with value: 199.7757522193904 and parameters: {'lambda': 0.018718931668949983, 'alpha': 0.006540083828813192, 'colsample_bytree': 0.9, 

In [333]:
general_smape/n

21.649472842722986

# Demand Resoration

In [366]:
df_model = df.copy()
df_model["flg_spromo"] = [0] * len(df)

In [367]:
df_model.head()

Unnamed: 0,date,product_id,location_id,sales_qty,stock_qty,flg_spromo
0,2022-01-01,1,10,1745,1975,0
1,2022-01-02,1,10,1889,1746,0
2,2022-01-03,1,10,1581,2075,0
3,2022-01-04,1,10,1852,1411,0
4,2022-01-05,1,10,1761,1798,0


# промо


In [457]:
df_new = pd.DataFrame()

for product in product_ids:
    for shop in location_ids:
        df_tmp = df_model.loc[(df_model["product_id"] == product) & (df_model["location_id"] == location)]
        df_tmp = dem.restore_demand(df_tmp, product, location_id=location, method="promo")
        df_new = df_new.append(df_tmp)

In [458]:
df_new.head()

Unnamed: 0,product_id,location_id,flg_spromo,sales_qty,stock_qty,lambda,demand
2022-01-01,1,90,0,1800,950,1585.84507,1800.0
2022-01-02,1,90,0,1996,1653,1585.84507,1996.0
2022-01-03,1,90,0,1749,1807,1585.84507,1749.0
2022-01-04,1,90,0,1426,2411,1585.84507,1520.0
2022-01-05,1,90,0,1914,1972,1585.84507,1914.0


In [461]:
df_new["real demand"] = np.where(df_new["sales_qty"] < df_new["stock_qty"], df_new["sales_qty"], np.nan)
df_new = df_new[df_new["real demand"].notna()]

In [462]:
df_new

Unnamed: 0,product_id,location_id,flg_spromo,sales_qty,stock_qty,lambda,demand,real demand
2022-01-03,1,90,0,1749,1807,1585.845070,1749.0,1749.0
2022-01-04,1,90,0,1426,2411,1585.845070,1520.0,1426.0
2022-01-05,1,90,0,1914,1972,1585.845070,1914.0,1914.0
2022-01-08,1,90,0,1507,1591,1585.845070,1537.0,1507.0
2022-01-09,1,90,0,1817,2053,1585.845070,1817.0,1817.0
...,...,...,...,...,...,...,...,...
2022-12-20,10,90,0,1222,1960,1350.642384,1378.0,1222.0
2022-12-21,10,90,0,1202,1281,1350.642384,1301.0,1202.0
2022-12-25,10,90,0,1699,1929,1350.642384,1699.0,1699.0
2022-12-27,10,90,0,1483,1719,1350.642384,1483.0,1483.0


In [463]:
my_smape(df_new["lambda"], df_new["real demand"])

17.919114801039527

# window

In [464]:
df_new = pd.DataFrame()

for product in product_ids:
    for shop in location_ids:
        df_tmp = df_model.loc[(df_model["product_id"] == product) & (df_model["location_id"] == location)]
        df_tmp = dem.restore_demand(df_tmp, product, location_id=location, method="window")
        df_new = df_new.append(df_tmp)

In [465]:
df_new.head()

Unnamed: 0,product_id,location_id,flg_spromo,sales_qty,stock_qty,lambda,demand
2022-01-01,1,90,0,1800,950,1848.333333,1833.0
2022-01-02,1,90,0,1996,1653,1742.75,1996.0
2022-01-03,1,90,0,1749,1807,1777.0,1826.0
2022-01-04,1,90,0,1426,2411,1783.8,1832.0
2022-01-05,1,90,0,1914,1972,1850.4,1914.0


In [466]:
df_new["real demand"] = np.where(df_new["sales_qty"] < df_new["stock_qty"], df_new["sales_qty"], np.nan)
df_new = df_new[df_new["real demand"].notna()]

In [467]:
my_smape(df_new["lambda"], df_new["real demand"])

19.9142014123869

In [None]:
- добавить фильтры лаговых фич
- добавить помимо медианы - персентиль/ и тд
- получить данные
- сделать сегментацию на моделя машинного обучения