Imports

In [1]:
from model import Forecast, FitPredict, Evaluation, Multiseries
from process_data import PreProcessData, PostProcessData, ProcessMultiseries

import pandas as pd
import random

Load datasets

In [2]:
df_train = PreProcessData.train
df_test = PreProcessData.test

In [3]:
forecast = Forecast()
fit_predict = FitPredict()
evaluator = Evaluation()
preprocessor = PreProcessData(df_train, df_test)
postprocessor = PostProcessData()
multiforecast = Multiseries()
multiprocessor = ProcessMultiseries()

Load models

In [4]:
lgbm = forecast.create_lgbm_regressor_forecaster
svr = forecast.create_svr_regresor_forecaster
forest = forecast.create_random_forest_regresor_forecaster
sarimax = forecast.create_sarimax_forecaster

In [5]:
model_list = [lgbm, svr, forest, sarimax]

In [6]:
name_models = {lgbm:'lgbm', forest:'forest', svr:'svr', sarimax:'sarimax'}

In [8]:
df_to_2ndmodel = [('Singapore','Premium Sticker Mart','Kaggle'),
('Norway','Premium Sticker Mart','Kerneler'),
('Norway','Premium Sticker Mart','Kaggle Tiers'),
('Norway','Premium Sticker Mart','Kaggle'),
('Norway','Premium Sticker Mart','Holographic Goose'),
('Norway','Stickers for Less','Kerneler Dark Mode'),
('Norway','Stickers for Less','Kerneler'),
('Norway','Stickers for Less','Kaggle Tiers'),
('Norway','Stickers for Less','Kaggle'),
('Norway','Stickers for Less','Holographic Goose'),
('Norway','Discount Stickers','Kerneler'),
('Norway','Discount Stickers','Kaggle Tiers'),
('Norway','Discount Stickers','Kaggle'),
('Norway','Discount Stickers','Holographic Goose'),
('Italy','Premium Sticker Mart','Kaggle Tiers'),
('Italy','Premium Sticker Mart','Kaggle'),
('Italy','Stickers for Less','Kaggle Tiers'),
('Italy','Stickers for Less','Kaggle'),
('Italy','Discount Stickers','Kaggle Tiers'),
('Italy','Discount Stickers','Kaggle'),
('Finland','Premium Sticker Mart','Kaggle Tiers'),
('Finland','Premium Sticker Mart','Kaggle'),
('Finland','Stickers for Less','Kaggle Tiers'),
('Finland','Stickers for Less','Kaggle'),
('Finland','Discount Stickers','Kaggle Tiers'),
('Finland','Discount Stickers','Kaggle'),
('Canada','Premium Sticker Mart','Kaggle Tiers'),
('Canada','Premium Sticker Mart','Kaggle'),
('Canada','Stickers for Less','Kaggle Tiers'),
('Canada','Stickers for Less','Kaggle' ),
('Canada','Stickers for Less','Holographic Goose'),
('Canada','Discount Stickers','Kaggle Tiers'),
('Canada','Discount Stickers','Kaggle')]

Fit models

In [7]:
df_dic = preprocessor.clean_dic
dic_sample = random.choice(list(df_dic.values()))
train_sample, test_sample = dic_sample[0], dic_sample[1]
steps = len(test_sample)
y_column = 'num_sold'

In [9]:
multi_dic = {key:value for key,value in df_dic.items() if key in df_to_2ndmodel}


In [10]:
multi_dic_train = {key:value[0] for key,value in multi_dic.items()}

In [11]:
multi_train_df = multiprocessor.group_df(multi_dic_train)

In [27]:
multi_train_df

Unnamed: 0_level_0,Canada_Discount Stickers_Kaggle,Canada_Discount Stickers_Kaggle Tiers,Canada_Premium Sticker Mart_Kaggle,Canada_Premium Sticker Mart_Kaggle Tiers,Canada_Stickers for Less_Holographic Goose,Canada_Stickers for Less_Kaggle,Canada_Stickers for Less_Kaggle Tiers,Finland_Discount Stickers_Kaggle,Finland_Discount Stickers_Kaggle Tiers,Finland_Premium Sticker Mart_Kaggle,...,Norway_Premium Sticker Mart_Holographic Goose,Norway_Premium Sticker Mart_Kaggle,Norway_Premium Sticker Mart_Kaggle Tiers,Norway_Premium Sticker Mart_Kerneler,Norway_Stickers for Less_Holographic Goose,Norway_Stickers for Less_Kaggle,Norway_Stickers for Less_Kaggle Tiers,Norway_Stickers for Less_Kerneler,Norway_Stickers for Less_Kerneler Dark Mode,Singapore_Premium Sticker Mart_Kaggle
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-01,973.0,906.0,2212.0,2013.0,300.0,1837.0,1659.0,926.0,774.0,2304.0,...,625.0,3940.0,3430.0,1951.0,579.0,3369.0,3195.0,1606.0,1672.0,2112.0
2010-01-02,881.0,854.0,2183.0,1953.0,281.0,1972.0,1541.0,869.0,764.0,1903.0,...,643.0,3915.0,3482.0,1741.0,475.0,3545.0,2784.0,1537.0,1907.0,2322.0
2010-01-03,1003.0,839.0,2459.0,1938.0,297.0,1936.0,1770.0,1036.0,815.0,2349.0,...,654.0,4541.0,3867.0,1971.0,568.0,3526.0,2866.0,1571.0,1879.0,2253.0
2010-01-04,744.0,609.0,1714.0,1567.0,235.0,1382.0,1243.0,677.0,664.0,1705.0,...,496.0,3303.0,2913.0,1460.0,445.0,2851.0,2627.0,1246.0,1532.0,1760.0
2010-01-05,707.0,640.0,1593.0,1590.0,240.0,1416.0,1319.0,782.0,608.0,1611.0,...,478.0,3008.0,2824.0,1471.0,400.0,2720.0,2323.0,1246.0,1488.0,1536.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-27,694.0,577.0,1611.0,1316.0,233.0,1518.0,1104.0,859.0,585.0,1852.0,...,435.0,2819.0,2125.0,1217.0,404.0,2355.0,1854.0,1099.0,1328.0,2169.0
2016-12-28,810.0,601.0,1812.0,1418.0,261.0,1702.0,1249.0,878.0,657.0,1961.0,...,509.0,3283.0,2455.0,1285.0,409.0,2610.0,1967.0,1149.0,1316.0,2558.0
2016-12-29,856.0,687.0,2157.0,1604.0,266.0,1607.0,1137.0,915.0,722.0,2366.0,...,498.0,3341.0,2756.0,1509.0,388.0,2764.0,2111.0,1272.0,1375.0,2833.0
2016-12-30,939.0,704.0,2218.0,1597.0,303.0,1851.0,1465.0,886.0,702.0,2303.0,...,541.0,3465.0,2512.0,1598.0,468.0,3127.0,2359.0,1221.0,1622.0,2822.0


In [18]:
sample = df_to_2ndmodel[0]
sample

('Singapore', 'Premium Sticker Mart', 'Kaggle')

In [20]:
sample_str = '_'.join(sample)
sample_str

'Singapore_Premium Sticker Mart_Kaggle'

In [26]:
submission = pd.DataFrame()
counter = 0
for key, value in df_dic.items():
    counter += 1
    print(counter)
    if key not in df_to_2ndmodel:
        forecaster = forest(steps)
        print(key)
        fit_predict.fit_forecaster(forecaster, value[0], y_column )
        predictions = fit_predict.get_predictions(forecaster, steps)
        test_w_preds = pd.concat([value[1], predictions], axis=1).rename(columns={'pred':'num_sold'})
        submission = pd.concat([submission, test_w_preds])
        submission = submission.reset_index()[['id', 'num_sold']].sort_values('id')
        print(f'submission_updated for {key}')
    if key in df_to_2ndmodel:
        print(key)
        forecaster = multiforecast.create_fit_multi_forecaster(steps, multi_train_df)
        level_str = '_'.join(key)
        predictions = multiforecast.predict_multi(steps, forecaster, level_str)
        print('PREDICTIONS')
        display(predictions)
        test_w_preds = pd.concat([value[1], predictions], axis=1).rename(columns={f'{level_str}':'num_sold'})
        submission = pd.concat([submission, test_w_preds])
        submission = submission.reset_index()[['id', 'num_sold']].sort_values('id')
        print(f'submission_updated for {key} multi')
submission.to_csv('submission_20250129_au_multi.csv', index=False)
print('submission_saved')

1
('Canada', 'Discount Stickers', 'Holographic Goose')
submission_updated for ('Canada', 'Discount Stickers', 'Holographic Goose')
2
('Canada', 'Discount Stickers', 'Kaggle')




PREDICTIONS


Unnamed: 0,Canada_Discount Stickers_Kaggle
2017-01-01,845.120949
2017-01-02,685.564803
2017-01-03,652.511043
2017-01-04,637.771322
2017-01-05,653.975282
...,...
2019-12-27,812.560348
2019-12-28,860.846381
2019-12-29,913.478080
2019-12-30,788.617702


submission_updated for ('Canada', 'Discount Stickers', 'Kaggle') multi
3
('Canada', 'Discount Stickers', 'Kaggle Tiers')




PREDICTIONS


Unnamed: 0,Canada_Discount Stickers_Kaggle Tiers
2017-01-01,631.925599
2017-01-02,499.041832
2017-01-03,479.278378
2017-01-04,481.653506
2017-01-05,481.384547
...,...
2019-12-27,692.153212
2019-12-28,716.615645
2019-12-29,768.807051
2019-12-30,667.365167


submission_updated for ('Canada', 'Discount Stickers', 'Kaggle Tiers') multi
4
('Canada', 'Discount Stickers', 'Kerneler')
submission_updated for ('Canada', 'Discount Stickers', 'Kerneler')
5
('Canada', 'Discount Stickers', 'Kerneler Dark Mode')
submission_updated for ('Canada', 'Discount Stickers', 'Kerneler Dark Mode')
6
('Canada', 'Premium Sticker Mart', 'Holographic Goose')


KeyboardInterrupt: 