## 参考  
### kaggle notebook lightGBM
https://www.kaggle.com/girmdshinsei/for-japanese-beginner-with-wrmsse-in-lgbm?scriptVersionId=31044557

### lightGBM 時系列
https://qiita.com/ground0state/items/657861de619a4e4a30de

### Quick start catboost
https://catboost.ai/docs/concepts/python-quickstart.html

### Pythonでcatboostを使ってみる
#### (cat_featuresの使い方を調べた)
https://qiita.com/shin_mura/items/3d9ce25a60bdd25a3333

### XGBoostパラメータのまとめとランダムサーチ実装
https://qiita.com/FJyusk56/items/0649f4362587261bd57a

In [1]:
import pandas as pd
import os
import gc
import numpy as np
import matplotlib.pyplot as plt

# ラベルエンコーダー
from sklearn import preprocessing, metrics

# lightGBM
import lightgbm as lgb

# CatBoost
from catboost import Pool, CatBoostRegressor

# XGBoost
import xgboost as xgb

# 1セルでまとめて.head()、.tail()等を入力しても大丈夫になる
from IPython.display import display

# 誤差算定
from sklearn.metrics import mean_absolute_error

# 二乗平均平方根誤差 (RMSE)
from sklearn.metrics import mean_squared_error

# 決定係数
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings('ignore')

# DataFrameの表示数を変更
pd.set_option('max_columns', 500)
pd.set_option('max_rows', 500)

### データ作成

In [2]:
# ローカル用
path = os.getcwd() + "/"

# kaggle Notebook用
INPUT_DIR = '../input/m5-forecasting-accuracy'

calendar.csv -製品の販売日に関する情報が含まれています。
sales_train_validation.csv -製品および店舗ごとの過去の毎日の販売台数データが​​含まれています [d_1 - d_1913]
sample_submission.csv-提出の正しい形式。詳細については、「評価」タブを参照してください。
sell_prices.csv -店舗および日付ごとに販売された製品の価格に関する情報が含まれています。

sales_train_evaluation.csv-締め切りの1か月前に1回ご利用いただけます。売上高が含まれます[d_1 - d_1941]

各行は含むidの連結であるitem_idとstore_idのいずれかである、validation（公共のランキングに対応する）、またはevaluation（プライベートランキングに対応します）。　　 F1-F28各行で販売されるアイテムの28予測日（）を予測しています。　　 以下のためのvalidation行、これに相当するd_1914 - d_1941、とのためevaluationの行、これに相当しますd_1942 - d_1969。　　

validation d_1914 - d_1941の単価と量を予測する。１か月前に１回公開される。 evaluation d_1942-d_1969の単価と量を予測する

### 作成データの読込

In [3]:
try:
    stv_melt = pd.read_pickle(path + "melt_stv.pkl")
except FileNotFoundError:
    1

### 通常通りcsvを読み込んでデータを作る

In [4]:
# sales_train_validation.csv
try:
    stv = pd.read_csv(path + "sales_train_validation.csv") # ローカル用
except FileNotFoundError:
    stv = pd.read_csv(f"{INPUT_DIR}/sales_train_validation.csv") # kaggle用

    
# calendar.csv
try:
    cal = pd.read_csv(path + "calendar.csv") # ローカル用
except FileNotFoundError:
    cal = pd.read_csv(f"{INPUT_DIR}/calendar.csv") # kaggle用

    
# sell_prices.csv
try:
    price = pd.read_csv(path + "sell_prices.csv") # ローカル用
except FileNotFoundError:
    price = pd.read_csv(f"{INPUT_DIR}/sell_prices.csv") # kaggle用

    
# sample_submission.csv
try:
    ss = pd.read_csv(path + "sample_submission.csv") # ローカル用
except FileNotFoundError:
    ss = pd.read_csv(f"{INPUT_DIR}/sample_submission.csv") # kaggle用

stv.shape

(30490, 1919)

### ロースペックマシン限定

stv = pd.read_csv(path + "sales_train_validation.csv",
                               skiprows=lambda x: x not in range(0,1001))

cal = pd.read_csv(path + "calendar.csv",
                               skiprows=lambda x: x not in range(0,3001))

price = pd.read_csv(path + "sell_prices.csv",
                               skiprows=lambda x: x not in range(0,3001))

ss = pd.read_csv(path + "sample_submission.csv",
                               skiprows=lambda x: x not in range(0,1001))

In [5]:
display(stv.head())
display(stv.tail())
display(stv.dtypes)
display(cal.head())
display(cal.tail())
display(cal.dtypes)
display(cal.max())
display(price.head())
display(price.tail())
display(price.dtypes)
display(price.max())
display(price.shape)
display(ss.head())
display(ss.tail())
display(ss.shape)

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,d_11,d_12,d_13,d_14,d_15,d_16,d_17,d_18,d_19,d_20,d_21,d_22,d_23,d_24,d_25,d_26,d_27,d_28,d_29,d_30,d_31,d_32,d_33,d_34,d_35,d_36,d_37,d_38,d_39,d_40,d_41,d_42,d_43,d_44,d_45,d_46,d_47,d_48,d_49,d_50,d_51,d_52,d_53,d_54,d_55,d_56,d_57,d_58,d_59,d_60,d_61,d_62,d_63,d_64,d_65,d_66,d_67,d_68,d_69,d_70,d_71,d_72,d_73,d_74,d_75,d_76,d_77,d_78,d_79,d_80,d_81,d_82,d_83,d_84,d_85,d_86,d_87,d_88,d_89,d_90,d_91,d_92,d_93,d_94,d_95,d_96,d_97,d_98,d_99,d_100,d_101,d_102,d_103,d_104,d_105,d_106,d_107,d_108,d_109,d_110,d_111,d_112,d_113,d_114,d_115,d_116,d_117,d_118,d_119,d_120,d_121,d_122,d_123,d_124,d_125,d_126,d_127,d_128,d_129,d_130,d_131,d_132,d_133,d_134,d_135,d_136,d_137,d_138,d_139,d_140,d_141,d_142,d_143,d_144,d_145,d_146,d_147,d_148,d_149,d_150,d_151,d_152,d_153,d_154,d_155,d_156,d_157,d_158,d_159,d_160,d_161,d_162,d_163,d_164,d_165,d_166,d_167,d_168,d_169,d_170,d_171,d_172,d_173,d_174,d_175,d_176,d_177,d_178,d_179,d_180,d_181,d_182,d_183,d_184,d_185,d_186,d_187,d_188,d_189,d_190,d_191,d_192,d_193,d_194,d_195,d_196,d_197,d_198,d_199,d_200,d_201,d_202,d_203,d_204,d_205,d_206,d_207,d_208,d_209,d_210,d_211,d_212,d_213,d_214,d_215,d_216,d_217,d_218,d_219,d_220,d_221,d_222,d_223,d_224,d_225,d_226,d_227,d_228,d_229,d_230,d_231,d_232,d_233,d_234,d_235,d_236,d_237,d_238,d_239,d_240,d_241,d_242,d_243,d_244,...,d_1664,d_1665,d_1666,d_1667,d_1668,d_1669,d_1670,d_1671,d_1672,d_1673,d_1674,d_1675,d_1676,d_1677,d_1678,d_1679,d_1680,d_1681,d_1682,d_1683,d_1684,d_1685,d_1686,d_1687,d_1688,d_1689,d_1690,d_1691,d_1692,d_1693,d_1694,d_1695,d_1696,d_1697,d_1698,d_1699,d_1700,d_1701,d_1702,d_1703,d_1704,d_1705,d_1706,d_1707,d_1708,d_1709,d_1710,d_1711,d_1712,d_1713,d_1714,d_1715,d_1716,d_1717,d_1718,d_1719,d_1720,d_1721,d_1722,d_1723,d_1724,d_1725,d_1726,d_1727,d_1728,d_1729,d_1730,d_1731,d_1732,d_1733,d_1734,d_1735,d_1736,d_1737,d_1738,d_1739,d_1740,d_1741,d_1742,d_1743,d_1744,d_1745,d_1746,d_1747,d_1748,d_1749,d_1750,d_1751,d_1752,d_1753,d_1754,d_1755,d_1756,d_1757,d_1758,d_1759,d_1760,d_1761,d_1762,d_1763,d_1764,d_1765,d_1766,d_1767,d_1768,d_1769,d_1770,d_1771,d_1772,d_1773,d_1774,d_1775,d_1776,d_1777,d_1778,d_1779,d_1780,d_1781,d_1782,d_1783,d_1784,d_1785,d_1786,d_1787,d_1788,d_1789,d_1790,d_1791,d_1792,d_1793,d_1794,d_1795,d_1796,d_1797,d_1798,d_1799,d_1800,d_1801,d_1802,d_1803,d_1804,d_1805,d_1806,d_1807,d_1808,d_1809,d_1810,d_1811,d_1812,d_1813,d_1814,d_1815,d_1816,d_1817,d_1818,d_1819,d_1820,d_1821,d_1822,d_1823,d_1824,d_1825,d_1826,d_1827,d_1828,d_1829,d_1830,d_1831,d_1832,d_1833,d_1834,d_1835,d_1836,d_1837,d_1838,d_1839,d_1840,d_1841,d_1842,d_1843,d_1844,d_1845,d_1846,d_1847,d_1848,d_1849,d_1850,d_1851,d_1852,d_1853,d_1854,d_1855,d_1856,d_1857,d_1858,d_1859,d_1860,d_1861,d_1862,d_1863,d_1864,d_1865,d_1866,d_1867,d_1868,d_1869,d_1870,d_1871,d_1872,d_1873,d_1874,d_1875,d_1876,d_1877,d_1878,d_1879,d_1880,d_1881,d_1882,d_1883,d_1884,d_1885,d_1886,d_1887,d_1888,d_1889,d_1890,d_1891,d_1892,d_1893,d_1894,d_1895,d_1896,d_1897,d_1898,d_1899,d_1900,d_1901,d_1902,d_1903,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,2,0,2,2,0,0,0,1,1,0,2,0,1,1,2,0,1,0,0,0,2,1,0,1,1,2,0,0,0,0,0,0,0,1,0,1,0,1,0,3,1,1,0,1,1,2,0,0,0,0,1,1,0,0,0,0,3,0,1,0,0,0,0,1,1,1,0,1,0,2,0,0,0,0,2,0,0,0,0,1,1,2,0,0,0,0,2,0,0,1,1,1,1,0,0,0,0,0,1,2,2,0,1,0,0,0,0,1,2,1,0,0,0,0,0,1,0,3,0,1,2,1,0,3,0,0,0,1,0,2,2,1,0,0,1,2,0,1,0,1,4,0,0,5,0,0,0,0,0,0,2,1,2,1,0,0,0,1,1,1,0,0,1,1,1,1,1,0,0,0,2,2,0,0,1,4,0,0,0,0,1,1,2,0,4,0,1,0,1,4,2,0,2,0,1,1,0,1,0,0,1,1,3,0,0,0,1,1,1,3,1,3,1,2,2,0,1,1,1,1,0,0,0,0,0,1,0,4,2,3,0,1,2,0,0,0,1,1,3,0,1,1,1,3,0,1,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,...,1,0,0,1,1,0,0,0,3,4,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,2,1,0,0,0,1,1,0,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,2,1,0,0,1,1,0,2,0,1,0,2,1,1,5,0,1,0,3,5,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2,1,0,3,0,1,2,0,3,1,0,0,1,0,1,0,0,0,0,2,0,1,0,1,0,1,1,0,1,0,1,0,0,0,1,2,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,2,0,1,0,0,2,0,0,0,1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,2,0,2,3,0,1,3,1,2,2,3,0,1,1,0,0,0,0,2,3,1,1,4,3,2,1,2,2,0,1,5,2,0,1,2,3,0,1,2,1,3,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,6,1,1,2,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,2,2,1,2,1,1,1,0,1,1,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,1,0,0,0,0,0,0,2,0,1,0,0,1,1,1,0,2,3,1,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,2,3,0,2,0,0,2,2,0,0,2,1,2,1,1,1,2,0,0,1,0,1,3,1,0,0,0,0,3,5,2,2,1,1,1,1,1,1,0,0,2,1,1,1,2,0,0,0,2,5,6,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,2,0,0,5,1,0,0,1,3,1,3,5,1,3,0,3,4,4,0,0,1,3,1,4,0,0,2,0,2,0,1,4,2,1,0,2,1,3,6,1,1,2,1,2,3,1,2,0,0,0,3,4,5,1,0,0,1,0,1,4,6,3,1,1,0,1,4,5,1,1,4,0,0,0,1,2,2,1,1,6,2,4,4,0,0,0,2,2,0,1,1,3,1,2,4,2,1,3,2,...,0,0,0,11,2,2,1,1,2,1,1,2,1,1,0,3,0,2,14,0,0,0,3,3,1,1,1,1,0,3,3,1,7,3,1,0,0,1,0,1,1,0,0,2,1,4,4,3,0,2,0,0,1,3,3,0,2,1,2,4,7,0,2,1,0,5,5,2,2,4,1,0,0,3,1,0,0,0,3,1,3,3,0,0,4,1,1,1,1,3,3,1,0,3,0,1,3,3,3,2,2,2,4,3,0,5,1,3,3,2,0,0,1,1,0,2,2,2,3,2,1,2,0,5,0,1,0,0,0,3,4,0,0,1,5,3,2,2,0,1,1,0,2,1,0,2,4,0,0,0,3,2,4,3,1,2,3,0,8,2,1,2,2,5,2,6,1,0,3,5,1,1,6,4,3,2,2,3,2,1,0,0,0,2,0,5,4,2,1,1,2,3,0,6,0,0,0,1,0,1,5,3,1,0,0,0,1,2,3,0,1,3,4,2,1,4,1,3,5,0,6,6,0,0,0,0,3,1,2,1,3,1,0,2,5,4,2,0,3,0,1,0,5,4,1,0,1,3,7,2
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,3,2,3,5,3,1,0,0,1,0,2,2,4,0,0,3,1,1,1,2,2,0,0,0,0,0,0,3,7,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,2,0,0,0,0,0,0,0,2,0,6,2,4,2,0,0,3,2,4,1,0,0,0,0,1,1,1,2,0,0,0,0,0,0,0,0,2,6,7,9,4,7,6,3,4,2,0,1,5,2,2,0,0,0,0,0,0,1,3,...,2,0,5,2,0,0,1,0,3,1,0,1,0,2,1,0,0,1,3,1,1,0,4,0,2,1,2,4,0,1,0,0,3,1,2,2,0,1,1,0,4,0,0,4,0,2,2,2,1,2,1,1,1,4,0,2,1,2,0,0,1,0,1,1,2,2,3,1,0,2,3,0,1,1,4,0,3,2,1,2,1,2,2,1,2,0,1,1,2,0,2,0,0,0,4,2,1,2,0,0,0,0,0,2,1,0,0,1,2,0,1,2,1,2,1,2,3,3,0,3,1,5,3,2,1,2,3,4,0,0,1,0,0,1,0,0,1,0,0,0,0,2,0,0,3,0,0,1,2,2,0,1,0,0,0,1,0,0,3,0,0,1,1,0,3,1,0,4,1,2,0,0,0,1,1,2,0,0,5,2,2,2,1,0,0,0,3,0,0,0,3,1,1,1,1,2,1,0,0,1,0,2,1,1,0,3,1,1,2,1,1,0,3,2,2,2,3,1,0,0,0,0,1,0,4,4,0,1,4,0,1,0,1,0,1,1,2,0,1,1,2,1,1,0,1,1,2,2,2,4


Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,d_11,d_12,d_13,d_14,d_15,d_16,d_17,d_18,d_19,d_20,d_21,d_22,d_23,d_24,d_25,d_26,d_27,d_28,d_29,d_30,d_31,d_32,d_33,d_34,d_35,d_36,d_37,d_38,d_39,d_40,d_41,d_42,d_43,d_44,d_45,d_46,d_47,d_48,d_49,d_50,d_51,d_52,d_53,d_54,d_55,d_56,d_57,d_58,d_59,d_60,d_61,d_62,d_63,d_64,d_65,d_66,d_67,d_68,d_69,d_70,d_71,d_72,d_73,d_74,d_75,d_76,d_77,d_78,d_79,d_80,d_81,d_82,d_83,d_84,d_85,d_86,d_87,d_88,d_89,d_90,d_91,d_92,d_93,d_94,d_95,d_96,d_97,d_98,d_99,d_100,d_101,d_102,d_103,d_104,d_105,d_106,d_107,d_108,d_109,d_110,d_111,d_112,d_113,d_114,d_115,d_116,d_117,d_118,d_119,d_120,d_121,d_122,d_123,d_124,d_125,d_126,d_127,d_128,d_129,d_130,d_131,d_132,d_133,d_134,d_135,d_136,d_137,d_138,d_139,d_140,d_141,d_142,d_143,d_144,d_145,d_146,d_147,d_148,d_149,d_150,d_151,d_152,d_153,d_154,d_155,d_156,d_157,d_158,d_159,d_160,d_161,d_162,d_163,d_164,d_165,d_166,d_167,d_168,d_169,d_170,d_171,d_172,d_173,d_174,d_175,d_176,d_177,d_178,d_179,d_180,d_181,d_182,d_183,d_184,d_185,d_186,d_187,d_188,d_189,d_190,d_191,d_192,d_193,d_194,d_195,d_196,d_197,d_198,d_199,d_200,d_201,d_202,d_203,d_204,d_205,d_206,d_207,d_208,d_209,d_210,d_211,d_212,d_213,d_214,d_215,d_216,d_217,d_218,d_219,d_220,d_221,d_222,d_223,d_224,d_225,d_226,d_227,d_228,d_229,d_230,d_231,d_232,d_233,d_234,d_235,d_236,d_237,d_238,d_239,d_240,d_241,d_242,d_243,d_244,...,d_1664,d_1665,d_1666,d_1667,d_1668,d_1669,d_1670,d_1671,d_1672,d_1673,d_1674,d_1675,d_1676,d_1677,d_1678,d_1679,d_1680,d_1681,d_1682,d_1683,d_1684,d_1685,d_1686,d_1687,d_1688,d_1689,d_1690,d_1691,d_1692,d_1693,d_1694,d_1695,d_1696,d_1697,d_1698,d_1699,d_1700,d_1701,d_1702,d_1703,d_1704,d_1705,d_1706,d_1707,d_1708,d_1709,d_1710,d_1711,d_1712,d_1713,d_1714,d_1715,d_1716,d_1717,d_1718,d_1719,d_1720,d_1721,d_1722,d_1723,d_1724,d_1725,d_1726,d_1727,d_1728,d_1729,d_1730,d_1731,d_1732,d_1733,d_1734,d_1735,d_1736,d_1737,d_1738,d_1739,d_1740,d_1741,d_1742,d_1743,d_1744,d_1745,d_1746,d_1747,d_1748,d_1749,d_1750,d_1751,d_1752,d_1753,d_1754,d_1755,d_1756,d_1757,d_1758,d_1759,d_1760,d_1761,d_1762,d_1763,d_1764,d_1765,d_1766,d_1767,d_1768,d_1769,d_1770,d_1771,d_1772,d_1773,d_1774,d_1775,d_1776,d_1777,d_1778,d_1779,d_1780,d_1781,d_1782,d_1783,d_1784,d_1785,d_1786,d_1787,d_1788,d_1789,d_1790,d_1791,d_1792,d_1793,d_1794,d_1795,d_1796,d_1797,d_1798,d_1799,d_1800,d_1801,d_1802,d_1803,d_1804,d_1805,d_1806,d_1807,d_1808,d_1809,d_1810,d_1811,d_1812,d_1813,d_1814,d_1815,d_1816,d_1817,d_1818,d_1819,d_1820,d_1821,d_1822,d_1823,d_1824,d_1825,d_1826,d_1827,d_1828,d_1829,d_1830,d_1831,d_1832,d_1833,d_1834,d_1835,d_1836,d_1837,d_1838,d_1839,d_1840,d_1841,d_1842,d_1843,d_1844,d_1845,d_1846,d_1847,d_1848,d_1849,d_1850,d_1851,d_1852,d_1853,d_1854,d_1855,d_1856,d_1857,d_1858,d_1859,d_1860,d_1861,d_1862,d_1863,d_1864,d_1865,d_1866,d_1867,d_1868,d_1869,d_1870,d_1871,d_1872,d_1873,d_1874,d_1875,d_1876,d_1877,d_1878,d_1879,d_1880,d_1881,d_1882,d_1883,d_1884,d_1885,d_1886,d_1887,d_1888,d_1889,d_1890,d_1891,d_1892,d_1893,d_1894,d_1895,d_1896,d_1897,d_1898,d_1899,d_1900,d_1901,d_1902,d_1903,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
30485,FOODS_3_823_WI_3_validation,FOODS_3_823,FOODS_3,FOODS,WI_3,WI,0,0,2,2,0,3,1,4,1,0,0,3,4,4,0,0,1,0,1,1,7,7,3,6,3,3,7,12,4,2,7,5,12,5,3,3,7,6,4,6,3,6,2,3,4,1,1,3,2,1,2,3,4,7,2,4,4,4,7,7,2,4,7,4,4,7,3,4,2,3,10,6,8,5,1,0,2,1,0,3,0,2,3,1,1,1,7,1,4,2,2,0,0,0,5,1,1,3,1,6,6,3,4,3,2,2,1,2,2,1,1,0,0,0,2,3,1,0,1,1,1,1,1,3,0,0,1,2,1,0,1,3,1,2,2,0,3,2,1,0,0,1,2,0,0,0,0,0,2,0,2,0,1,2,1,1,0,0,2,1,1,0,1,1,2,1,3,1,0,2,0,0,0,1,0,1,0,1,1,0,2,0,0,2,1,0,1,0,1,1,1,0,3,0,1,0,1,2,3,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,5,1,0,1,0,0,1,0,1,0,1,1,1,2,1,2,0,1,0,0,1,2,2,2,0,0,1,...,1,0,2,1,0,1,0,2,0,0,1,2,0,2,0,0,0,1,1,1,0,0,1,1,0,2,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,2,1,0,0,1,0,0,1,0,0,4,1,5,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,3,1,1,1,0,0,1,0,1,2,1,0,0,2,0,2,1,0,1,1,0,1,2,0,1,0,1,0,0,2,0,1,1,0,3,0,0,1,1,3,3,1,0,0,0,1,2,3,0,0,0,1,0,2,1,1,0,0,4,0,6,1,0,1,0,0,0,1,1,0,0,0,0,0,1,0,2,0,1,0,0,3,1,1,0,0,0,2,3,1,0,1,3,0,3,0,0,2,0,0,0,1,0,0,3,0,0,0,0,0,0,1,0,1,0,0,2,0,0,2,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,1,0,0,1
30486,FOODS_3_824_WI_3_validation,FOODS_3_824,FOODS_3,FOODS,WI_3,WI,0,0,0,0,0,5,0,1,1,3,1,1,0,4,2,0,1,2,1,1,0,0,0,0,3,1,1,1,2,0,1,0,1,1,1,3,3,4,4,3,6,1,0,1,2,3,0,1,1,1,0,0,0,1,0,0,1,0,0,0,1,2,0,1,2,1,2,0,1,1,0,1,1,2,0,3,2,1,0,0,0,0,1,1,3,1,0,1,1,0,1,2,0,3,1,2,1,0,0,0,1,0,1,1,0,1,2,2,0,0,0,1,0,0,2,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,2,1,0,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,1,2,2,2,0,0,0,3,1,4,1,2,1,2,3,3,0,0,2,2,0,3,1,0,0,0,0,0,0,2,2,5,0,0,1,0,1,1,2,0,1,0,1,0,3,0,0,2,1,1,0,1,0,1,0,0,0,0,1,0,0,0,2,1,1,1,0,2,0,1,0,1,4,1,0,1,1,0,1,0,0,1,1,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0
30487,FOODS_3_825_WI_3_validation,FOODS_3_825,FOODS_3,FOODS,WI_3,WI,0,6,0,2,2,4,1,8,5,2,7,5,3,5,20,8,10,3,3,4,7,2,3,5,6,3,4,1,2,5,1,2,2,2,1,9,8,1,6,3,3,4,10,4,6,3,2,1,1,3,2,2,1,1,1,1,1,0,1,4,0,0,1,3,1,1,1,3,0,1,1,1,2,3,1,1,1,2,1,3,1,0,0,3,1,1,2,0,1,2,1,1,2,1,0,0,1,0,1,0,2,2,3,2,0,2,2,2,0,1,0,1,1,2,1,1,0,0,0,0,0,2,0,2,0,2,2,0,1,1,1,2,1,3,0,2,1,1,1,1,2,1,0,1,0,2,1,0,1,2,0,1,0,0,2,1,1,1,1,2,2,1,1,4,7,0,1,4,2,2,2,0,1,0,0,0,1,0,0,0,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,...,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,1,0,1,0,1,1,0,1,0,2,0,1,1,1,1,1,1,4,0,1,3,1,0,0,0,1,0,0,0,0,0,1,2,1,0,2,0,0,2,2,0,3,0,0,0,0,0,1,3,3,1,0,2,0,0,0,1,1,0,0,0,1,0,0,2,0,0,2,1,0,0,1,2,0,1,2,1,0,1,1,1,1,0,0,1,0,0,1,0,0,0,2,0,1,2,2,2,2,2,2,1,1,2,1,3,1,1,0,3,1,1,2,0,0,2,0,0,0,2,0,1,1,0,0,0,0,1,1,2,1,4,0,0,2,2,4,0,0,0,2,1,0,2,0,1,0,0,1,0
30488,FOODS_3_826_WI_3_validation,FOODS_3_826,FOODS_3,FOODS,WI_3,WI,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,4,1,3,5,3,0,2,3,2,3,5,0,2,0,3,3,0,1,3,4,0,0,0,0,3,3,0,4,1,0,3,3,0,5,0,1,0,3,2,2,1,4,0,0,1,0,0,0,0,4,1,0,2,1,2,7,0,0,1,4,2,0,2,1,3,2,2,1,0,2,0,3,0,4,3,12,1,0,0,0,4,1,1,2,1,3,2,4,3,2,2,2,1,3,2,1,1,0,0,0,1,5,1,1,0,4,1,2,1,2,3,1,1,1,0,1,3,5,1,0,0,1,2,0,0,0,0,0,2,1,0,0,0,3,3,3,2,8,1,1,0,0,0,4,4,2,2,1,1,2,0,1,0,0,2,0,1,3,1,2,0,0,2,4,1,0,1,0,0,2,1,2,3,3,0,2,1,0,0,0,0,0,0,0,0,0,2,2,2,2,0,2,2,0,0,0,0,0,1,3,0,3,1,1,1,1,1,1,0,2,1,1,2,4,3,0,0,0,0,0,1,1,2,1,1,1,1,2,0,1,0,3,0,0,1,0,0,1,0,3,1,3
30489,FOODS_3_827_WI_3_validation,FOODS_3_827,FOODS_3,FOODS,WI_3,WI,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,3,0,0,3,7,1,0,5,0,0,0,2,1,3,1,3,3,1,2,1,2,4,0,0,0,0,0,3,1,0,1,4,4,12,3,4,0,0,2,1,0,1,0,1,4,2,0,1,1,2,2,7,3,2,10,0,1,4,0,2,6,0,0,6,2,4,0,0,0,1,3,2,2,4,2,2,3,2,0,1,0,0,4,2,4,1,1,3,1,0,0,1,2,0,1,0,1,0,2,0,1,1,6,1,2,0,1,0,3,0,1,3,0,1,0,5,1,2,2,2,0,2,3,1,2,0,6,0,0,3,1,0,0,0,0,0,0,0,1,1,0,2,1,4,1,8,3,2,0,0,2,2,0,1,3,5,1,0,0,0,0,0,6,3,2,1,0,2,1,1,2,1,2,0,1,10,0,3,2,1,2,1,1,3,0,0,2,1,3,0,0,1,4,0,2,0,5,4,1,3,0,0,0,0,0,2,2,4,1,3,3,3,0,1,4,2,0,5,3,2,0,5,7,3,1,4,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


id          object
item_id     object
dept_id     object
cat_id      object
store_id    object
             ...  
d_1909       int64
d_1910       int64
d_1911       int64
d_1912       int64
d_1913       int64
Length: 1919, dtype: object

Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
0,2011-01-29,11101,Saturday,1,1,2011,d_1,,,,,0,0,0
1,2011-01-30,11101,Sunday,2,1,2011,d_2,,,,,0,0,0
2,2011-01-31,11101,Monday,3,1,2011,d_3,,,,,0,0,0
3,2011-02-01,11101,Tuesday,4,2,2011,d_4,,,,,1,1,0
4,2011-02-02,11101,Wednesday,5,2,2011,d_5,,,,,1,0,1


Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
1964,2016-06-15,11620,Wednesday,5,6,2016,d_1965,,,,,0,1,1
1965,2016-06-16,11620,Thursday,6,6,2016,d_1966,,,,,0,0,0
1966,2016-06-17,11620,Friday,7,6,2016,d_1967,,,,,0,0,0
1967,2016-06-18,11621,Saturday,1,6,2016,d_1968,,,,,0,0,0
1968,2016-06-19,11621,Sunday,2,6,2016,d_1969,NBAFinalsEnd,Sporting,Father's day,Cultural,0,0,0


date            object
wm_yr_wk         int64
weekday         object
wday             int64
month            int64
year             int64
d               object
event_name_1    object
event_type_1    object
event_name_2    object
event_type_2    object
snap_CA          int64
snap_TX          int64
snap_WI          int64
dtype: object

date        2016-06-19
wm_yr_wk         11621
weekday      Wednesday
wday                 7
month               12
year              2016
d                d_999
snap_CA              1
snap_TX              1
snap_WI              1
dtype: object

Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price
0,CA_1,HOBBIES_1_001,11325,9.58
1,CA_1,HOBBIES_1_001,11326,9.58
2,CA_1,HOBBIES_1_001,11327,8.26
3,CA_1,HOBBIES_1_001,11328,8.26
4,CA_1,HOBBIES_1_001,11329,8.26


Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price
6841116,WI_3,FOODS_3_827,11617,1.0
6841117,WI_3,FOODS_3_827,11618,1.0
6841118,WI_3,FOODS_3_827,11619,1.0
6841119,WI_3,FOODS_3_827,11620,1.0
6841120,WI_3,FOODS_3_827,11621,1.0


store_id       object
item_id        object
wm_yr_wk        int64
sell_price    float64
dtype: object

store_id                 WI_3
item_id       HOUSEHOLD_2_516
wm_yr_wk                11621
sell_price             107.32
dtype: object

(6841121, 4)

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,HOBBIES_1_002_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,HOBBIES_1_004_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,HOBBIES_1_005_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
60975,FOODS_3_823_WI_3_evaluation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
60976,FOODS_3_824_WI_3_evaluation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
60977,FOODS_3_825_WI_3_evaluation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
60978,FOODS_3_826_WI_3_evaluation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
60979,FOODS_3_827_WI_3_evaluation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


(60980, 29)

In [6]:
day1_1913 = [f"d_{i}" for i in range(1, 1914)]

In [7]:
stv_melt =  pd.melt(stv, id_vars=['id','store_id','item_id'],
           value_vars=day1_1913,
           var_name = "d", value_name = "vol")

In [8]:
del day1_1913
gc.collect()

66

In [9]:
product = stv[["id","item_id","store_id"]]

In [10]:
ss_val = ss[0:30490]
ss_val.columns = ["id"] + [f"d_{d}" for d in range(1914, 1942)]

ss_eva = ss[30490:60980]
ss_eva.columns = ["id"] + [f"d_{d}" for d in range(1942, 1970)]

In [11]:
ss_eva['id'] = ss_eva['id'].str.replace('_evaluation','_validation')

In [12]:
ss_val = pd.merge(ss_val, product, how = 'left', left_on = ['id'], right_on = ['id'])
ss_eva = pd.merge(ss_eva, product, how = 'left', left_on = ['id'], right_on = ['id'])

In [13]:
display(ss_val.head(3))
display(ss_val.tail(3))
display(ss_val.shape)
display(ss_eva.head(3))
display(ss_eva.tail(3))
display(ss_eva.shape)

Unnamed: 0,id,d_1914,d_1915,d_1916,d_1917,d_1918,d_1919,d_1920,d_1921,d_1922,d_1923,d_1924,d_1925,d_1926,d_1927,d_1928,d_1929,d_1930,d_1931,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941,item_id,store_id
0,HOBBIES_1_001_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,HOBBIES_1_001,CA_1
1,HOBBIES_1_002_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,HOBBIES_1_002,CA_1
2,HOBBIES_1_003_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,HOBBIES_1_003,CA_1


Unnamed: 0,id,d_1914,d_1915,d_1916,d_1917,d_1918,d_1919,d_1920,d_1921,d_1922,d_1923,d_1924,d_1925,d_1926,d_1927,d_1928,d_1929,d_1930,d_1931,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941,item_id,store_id
30487,FOODS_3_825_WI_3_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FOODS_3_825,WI_3
30488,FOODS_3_826_WI_3_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FOODS_3_826,WI_3
30489,FOODS_3_827_WI_3_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FOODS_3_827,WI_3


(30490, 31)

Unnamed: 0,id,d_1942,d_1943,d_1944,d_1945,d_1946,d_1947,d_1948,d_1949,d_1950,d_1951,d_1952,d_1953,d_1954,d_1955,d_1956,d_1957,d_1958,d_1959,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969,item_id,store_id
0,HOBBIES_1_001_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,HOBBIES_1_001,CA_1
1,HOBBIES_1_002_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,HOBBIES_1_002,CA_1
2,HOBBIES_1_003_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,HOBBIES_1_003,CA_1


Unnamed: 0,id,d_1942,d_1943,d_1944,d_1945,d_1946,d_1947,d_1948,d_1949,d_1950,d_1951,d_1952,d_1953,d_1954,d_1955,d_1956,d_1957,d_1958,d_1959,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969,item_id,store_id
30487,FOODS_3_825_WI_3_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FOODS_3_825,WI_3
30488,FOODS_3_826_WI_3_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FOODS_3_826,WI_3
30489,FOODS_3_827_WI_3_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FOODS_3_827,WI_3


(30490, 31)

In [14]:
val_1914_1941 = [f"d_{i}" for i in range(1914, 1942)]
eva_1942_1969 = [f"d_{i}" for i in range(1942, 1970)]

In [15]:
val_melt =  pd.melt(ss_val, id_vars=['id','store_id', "item_id"],
           value_vars=val_1914_1941,
           var_name = "d", value_name = "vol")
eva_melt =  pd.melt(ss_eva, id_vars=['id','store_id', "item_id"],
           value_vars=eva_1942_1969,
           var_name = "d", value_name = "vol")

In [16]:
stv_melt = pd.concat([stv_melt, val_melt, eva_melt])

In [17]:
display(stv_melt.head(3))
display(stv_melt.tail(3))
display(stv_melt.shape)

Unnamed: 0,id,store_id,item_id,d,vol
0,HOBBIES_1_001_CA_1_validation,CA_1,HOBBIES_1_001,d_1,0
1,HOBBIES_1_002_CA_1_validation,CA_1,HOBBIES_1_002,d_1,0
2,HOBBIES_1_003_CA_1_validation,CA_1,HOBBIES_1_003,d_1,0


Unnamed: 0,id,store_id,item_id,d,vol
853717,FOODS_3_825_WI_3_validation,WI_3,FOODS_3_825,d_1969,0
853718,FOODS_3_826_WI_3_validation,WI_3,FOODS_3_826,d_1969,0
853719,FOODS_3_827_WI_3_validation,WI_3,FOODS_3_827,d_1969,0


(60034810, 5)

In [18]:
del ss, ss_val, ss_eva, val_1914_1941, eva_1942_1969, val_melt, eva_melt, product
gc.collect()

44

In [19]:
cal = cal[["date","wm_yr_wk","d","event_name_1","event_type_1","event_name_2","event_type_2"]]

In [20]:
stv_melt = pd.merge(stv_melt, cal, how = 'left', left_on = ['d'], right_on = ['d'])

In [21]:
del cal
gc.collect()

66

In [22]:
display(stv_melt.head())
display(stv_melt.tail())
display(stv_melt.dtypes)
display(stv_melt.shape)

Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2
0,HOBBIES_1_001_CA_1_validation,CA_1,HOBBIES_1_001,d_1,0,2011-01-29,11101,,,,
1,HOBBIES_1_002_CA_1_validation,CA_1,HOBBIES_1_002,d_1,0,2011-01-29,11101,,,,
2,HOBBIES_1_003_CA_1_validation,CA_1,HOBBIES_1_003,d_1,0,2011-01-29,11101,,,,
3,HOBBIES_1_004_CA_1_validation,CA_1,HOBBIES_1_004,d_1,0,2011-01-29,11101,,,,
4,HOBBIES_1_005_CA_1_validation,CA_1,HOBBIES_1_005,d_1,0,2011-01-29,11101,,,,


Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2
60034805,FOODS_3_823_WI_3_validation,WI_3,FOODS_3_823,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural
60034806,FOODS_3_824_WI_3_validation,WI_3,FOODS_3_824,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural
60034807,FOODS_3_825_WI_3_validation,WI_3,FOODS_3_825,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural
60034808,FOODS_3_826_WI_3_validation,WI_3,FOODS_3_826,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural
60034809,FOODS_3_827_WI_3_validation,WI_3,FOODS_3_827,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural


id              object
store_id        object
item_id         object
d               object
vol              int64
date            object
wm_yr_wk         int64
event_name_1    object
event_type_1    object
event_name_2    object
event_type_2    object
dtype: object

(60034810, 11)

In [23]:
stv_melt = stv_melt.merge(price, on = ['store_id', 'item_id', 'wm_yr_wk'], how = 'left')

In [24]:
del price
gc.collect()

66

In [25]:
display(stv_melt.head())
display(stv_melt.tail())
display(stv_melt.dtypes)
display(stv_melt.shape)

Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price
0,HOBBIES_1_001_CA_1_validation,CA_1,HOBBIES_1_001,d_1,0,2011-01-29,11101,,,,,
1,HOBBIES_1_002_CA_1_validation,CA_1,HOBBIES_1_002,d_1,0,2011-01-29,11101,,,,,
2,HOBBIES_1_003_CA_1_validation,CA_1,HOBBIES_1_003,d_1,0,2011-01-29,11101,,,,,
3,HOBBIES_1_004_CA_1_validation,CA_1,HOBBIES_1_004,d_1,0,2011-01-29,11101,,,,,
4,HOBBIES_1_005_CA_1_validation,CA_1,HOBBIES_1_005,d_1,0,2011-01-29,11101,,,,,


Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price
60034805,FOODS_3_823_WI_3_validation,WI_3,FOODS_3_823,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,2.98
60034806,FOODS_3_824_WI_3_validation,WI_3,FOODS_3_824,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,2.48
60034807,FOODS_3_825_WI_3_validation,WI_3,FOODS_3_825,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,3.98
60034808,FOODS_3_826_WI_3_validation,WI_3,FOODS_3_826,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,1.28
60034809,FOODS_3_827_WI_3_validation,WI_3,FOODS_3_827,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,1.0


id               object
store_id         object
item_id          object
d                object
vol               int64
date             object
wm_yr_wk          int64
event_name_1     object
event_type_1     object
event_name_2     object
event_type_2     object
sell_price      float64
dtype: object

(60034810, 12)

### 特徴量作成

In [26]:
stv_melt["date2"] = pd.to_datetime(stv_melt["date"])

In [27]:
stv_melt["year"] = stv_melt["date2"].dt.year
stv_melt["month"] = stv_melt["date2"].dt.month
stv_melt["week"] = stv_melt["date2"].dt.week
stv_melt["day"] = stv_melt["date2"].dt.day
stv_melt["dayofweek"] = stv_melt["date2"].dt.dayofweek

stv_melt["year"] = stv_melt["year"].astype('int8')
stv_melt["month"] = stv_melt["month"].astype('int8')
stv_melt["week"] = stv_melt["week"].astype('int8')
stv_melt["day"] = stv_melt["day"].astype('int8')
stv_melt["dayofweek"] = stv_melt["dayofweek"].astype('int8')

In [28]:
stv_melt = stv_melt.drop("date2", axis=1)

In [29]:
#　ラグの作成
for i in [7,30,90]:
    stv_melt['shift%s'%i] = stv_melt["vol"].shift(i)

In [30]:
# 平均値
'''
for i in [7,30,90]:
    stv_melt['mean%s'%i] = stv_melt["vol"].rolling(i).mean()
'''

'\nfor i in [7,30,90]:\n    stv_melt[\'mean%s\'%i] = stv_melt["vol"].rolling(i).mean()\n'

In [31]:
# 中央値
'''
for i in [7,30,90]:
    stv_melt['median%s'%i] = stv_melt["vol"].rolling(i).median()
'''

'\nfor i in [7,30,90]:\n    stv_melt[\'median%s\'%i] = stv_melt["vol"].rolling(i).median()\n'

In [32]:
# 最小値
'''
for i in [7,30,90]:
    stv_melt['min%s'%i] = stv_melt["vol"].rolling(i).min()
'''

'\nfor i in [7,30,90]:\n    stv_melt[\'min%s\'%i] = stv_melt["vol"].rolling(i).min()\n'

In [33]:
display(stv_melt.head(3))
display(stv_melt.tail(3))
display(stv_melt.dtypes)

Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
0,HOBBIES_1_001_CA_1_validation,CA_1,HOBBIES_1_001,d_1,0,2011-01-29,11101,,,,,,-37,1,4,29,5,,,
1,HOBBIES_1_002_CA_1_validation,CA_1,HOBBIES_1_002,d_1,0,2011-01-29,11101,,,,,,-37,1,4,29,5,,,
2,HOBBIES_1_003_CA_1_validation,CA_1,HOBBIES_1_003,d_1,0,2011-01-29,11101,,,,,,-37,1,4,29,5,,,


Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
60034807,FOODS_3_825_WI_3_validation,WI_3,FOODS_3_825,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,3.98,-32,6,24,19,6,0.0,0.0,0.0
60034808,FOODS_3_826_WI_3_validation,WI_3,FOODS_3_826,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,1.28,-32,6,24,19,6,0.0,0.0,0.0
60034809,FOODS_3_827_WI_3_validation,WI_3,FOODS_3_827,d_1969,0,2016-06-19,11621,NBAFinalsEnd,Sporting,Father's day,Cultural,1.0,-32,6,24,19,6,0.0,0.0,0.0


id               object
store_id         object
item_id          object
d                object
vol               int64
date             object
wm_yr_wk          int64
event_name_1     object
event_type_1     object
event_name_2     object
event_type_2     object
sell_price      float64
year               int8
month              int8
week               int8
day                int8
dayofweek          int8
shift7          float64
shift30         float64
shift90         float64
dtype: object

In [34]:
stv_melt["vol"] = stv_melt[["vol"]].astype('int16')
stv_melt["wm_yr_wk"] = stv_melt[ "wm_yr_wk"].astype('int16')

In [35]:
stv_melt["sell_price"] = stv_melt["sell_price"].astype('float16')
stv_melt["shift7"] = stv_melt["shift7"].astype('float16')

In [36]:
stv_melt["shift30"] = stv_melt["shift30"].astype('float16')
stv_melt["shift90"] = stv_melt["shift90"].astype('float16')

In [37]:
# ラベルエンコーダー
lbl = preprocessing.LabelEncoder()
stv_melt["store_id"] = lbl.fit_transform(stv_melt["store_id"])
stv_melt["item_id"] = lbl.fit_transform(stv_melt["item_id"])

stv_melt["store_id"] = stv_melt["store_id"].astype('int8')
stv_melt["item_id"] = stv_melt["item_id"].astype('int8')

In [38]:
# event は欠損値があるので前処理
stv_melt["event_name_1"] = stv_melt["event_name_1"].fillna("missing", inplace=True)
stv_melt["event_type_1"] = stv_melt["event_type_1"].fillna("missing", inplace=True)
stv_melt["event_name_2"] = stv_melt["event_name_2"].fillna("missing", inplace=True)
stv_melt["event_type_2"] = stv_melt["event_type_2"].fillna("missing", inplace=True)

stv_melt["event_name_1"] = lbl.fit_transform(stv_melt["event_name_1"])
stv_melt["event_type_1"] = lbl.fit_transform(stv_melt["event_type_1"])
stv_melt["event_name_2"] = lbl.fit_transform(stv_melt["event_name_2"])
stv_melt["event_type_2"] = lbl.fit_transform(stv_melt["event_type_2"])

stv_melt["event_name_1"] = stv_melt["event_name_1"].astype('int8')
stv_melt["event_name_2"] = stv_melt["event_name_2"].astype('int8')
stv_melt["event_type_1"] = stv_melt["event_type_1"].astype('int8')
stv_melt["event_type_2"] = stv_melt["event_type_2"].astype('int8')

In [39]:
display(stv_melt.head(3))
display(stv_melt.tail(3))
display(stv_melt.dtypes)

Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
0,HOBBIES_1_001_CA_1_validation,0,-99,d_1,0,2011-01-29,11101,0,0,0,0,,-37,1,4,29,5,,,
1,HOBBIES_1_002_CA_1_validation,0,-98,d_1,0,2011-01-29,11101,0,0,0,0,,-37,1,4,29,5,,,
2,HOBBIES_1_003_CA_1_validation,0,-97,d_1,0,2011-01-29,11101,0,0,0,0,,-37,1,4,29,5,,,


Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
60034807,FOODS_3_825_WI_3_validation,9,-102,d_1969,0,2016-06-19,11621,0,0,0,0,3.980469,-32,6,24,19,6,0.0,0.0,0.0
60034808,FOODS_3_826_WI_3_validation,9,-101,d_1969,0,2016-06-19,11621,0,0,0,0,1.280273,-32,6,24,19,6,0.0,0.0,0.0
60034809,FOODS_3_827_WI_3_validation,9,-100,d_1969,0,2016-06-19,11621,0,0,0,0,1.0,-32,6,24,19,6,0.0,0.0,0.0


id               object
store_id           int8
item_id            int8
d                object
vol               int16
date             object
wm_yr_wk          int16
event_name_1       int8
event_type_1       int8
event_name_2       int8
event_type_2       int8
sell_price      float16
year               int8
month              int8
week               int8
day                int8
dayofweek          int8
shift7          float16
shift30         float16
shift90         float16
dtype: object

### 学習用データセットの作成

In [40]:
x_train = stv_melt[stv_melt['date'] <= '2016-03-27']
y_train = x_train['vol']
x_val   = stv_melt[(stv_melt['date'] > '2016-03-27') & (stv_melt['date'] <= '2016-04-24')]
y_val   = x_val['vol']
test    = stv_melt[(stv_melt['date'] > '2016-04-24')]

In [41]:
display(test.head())
display(test.tail())
display(test.dtypes)

Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
58327370,HOBBIES_1_001_CA_1_validation,0,-99,d_1914,0,2016-04-25,11613,0,0,0,0,8.382812,-32,4,17,25,0,0.0,2.0,0.0
58327371,HOBBIES_1_002_CA_1_validation,0,-98,d_1914,0,2016-04-25,11613,0,0,0,0,3.970703,-32,4,17,25,0,1.0,0.0,2.0
58327372,HOBBIES_1_003_CA_1_validation,0,-97,d_1914,0,2016-04-25,11613,0,0,0,0,2.970703,-32,4,17,25,0,1.0,10.0,12.0
58327373,HOBBIES_1_004_CA_1_validation,0,-96,d_1914,0,2016-04-25,11613,0,0,0,0,4.640625,-32,4,17,25,0,0.0,2.0,0.0
58327374,HOBBIES_1_005_CA_1_validation,0,-95,d_1914,0,2016-04-25,11613,0,0,0,0,2.880859,-32,4,17,25,0,0.0,0.0,4.0


Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
60034805,FOODS_3_823_WI_3_validation,9,-104,d_1969,0,2016-06-19,11621,0,0,0,0,2.980469,-32,6,24,19,6,0.0,0.0,0.0
60034806,FOODS_3_824_WI_3_validation,9,-103,d_1969,0,2016-06-19,11621,0,0,0,0,2.480469,-32,6,24,19,6,0.0,0.0,0.0
60034807,FOODS_3_825_WI_3_validation,9,-102,d_1969,0,2016-06-19,11621,0,0,0,0,3.980469,-32,6,24,19,6,0.0,0.0,0.0
60034808,FOODS_3_826_WI_3_validation,9,-101,d_1969,0,2016-06-19,11621,0,0,0,0,1.280273,-32,6,24,19,6,0.0,0.0,0.0
60034809,FOODS_3_827_WI_3_validation,9,-100,d_1969,0,2016-06-19,11621,0,0,0,0,1.0,-32,6,24,19,6,0.0,0.0,0.0


id               object
store_id           int8
item_id            int8
d                object
vol               int16
date             object
wm_yr_wk          int16
event_name_1       int8
event_type_1       int8
event_name_2       int8
event_type_2       int8
sell_price      float16
year               int8
month              int8
week               int8
day                int8
dayofweek          int8
shift7          float16
shift30         float16
shift90         float16
dtype: object

In [42]:
del stv_melt
gc.collect()

101

### lightGBM モデルの実行

In [43]:
features = [
    "store_id",
    "item_id",
    "sell_price",
    "shift7",
    "shift30",
    "shift90",
    "event_name_1",
    "event_type_1",
    "event_name_2",
    "event_type_2",
    "year",
    "month",
    "week",
    "day",
    "dayofweek"
]

In [44]:
params = {
    'boosting_type': 'gbdt',
    'metric': 'rmse',
    'objective': 'regression',
    'n_jobs': -1,
    'seed': 236,
    'learning_rate': 0.1,
    'bagging_fraction': 0.75,
    'bagging_freq': 10, 
    'colsample_bytree': 0.75
}

In [45]:
train_set = lgb.Dataset(x_train[features], y_train)
val_set = lgb.Dataset(x_val[features], y_val)

In [46]:
model_lgb = lgb.train(params, train_set, num_boost_round = 2500, early_stopping_rounds = 50, valid_sets = [train_set, val_set], verbose_eval = 100)

Training until validation scores don't improve for 50 rounds
[100]	training's rmse: 3.10182	valid_1's rmse: 3.02791
[200]	training's rmse: 2.9776	valid_1's rmse: 2.9268
[300]	training's rmse: 2.88547	valid_1's rmse: 2.8448
[400]	training's rmse: 2.82248	valid_1's rmse: 2.77876
[500]	training's rmse: 2.77281	valid_1's rmse: 2.74045
[600]	training's rmse: 2.74273	valid_1's rmse: 2.71213
[700]	training's rmse: 2.7119	valid_1's rmse: 2.68153
[800]	training's rmse: 2.69123	valid_1's rmse: 2.66074
[900]	training's rmse: 2.67093	valid_1's rmse: 2.64424
[1000]	training's rmse: 2.65125	valid_1's rmse: 2.62263
[1100]	training's rmse: 2.63364	valid_1's rmse: 2.61122
[1200]	training's rmse: 2.61514	valid_1's rmse: 2.59834
[1300]	training's rmse: 2.60028	valid_1's rmse: 2.58223
[1400]	training's rmse: 2.58809	valid_1's rmse: 2.57084
[1500]	training's rmse: 2.57382	valid_1's rmse: 2.5625
[1600]	training's rmse: 2.5605	valid_1's rmse: 2.55551
[1700]	training's rmse: 2.54954	valid_1's rmse: 2.54769
[1

In [47]:
val_pred = model_lgb.predict(x_val[features])
val_score = np.sqrt(metrics.mean_squared_error(val_pred, y_val))
print(f'Our val rmse score は {val_score}')

Our val rmse score は 2.4935427705008792


In [48]:
y_pred = model_lgb.predict(test[features])
test['vol'] = y_pred

In [49]:
predictions = test[['id', 'date', 'vol']]
predictions = pd.pivot(predictions, index = 'id', columns = 'date', values = 'vol').reset_index()

In [50]:
display(predictions.head())
display(predictions.tail())
display(predictions.shape)

date,id,2016-04-25,2016-04-26,2016-04-27,2016-04-28,2016-04-29,2016-04-30,2016-05-01,2016-05-02,2016-05-03,2016-05-04,2016-05-05,2016-05-06,2016-05-07,2016-05-08,2016-05-09,2016-05-10,2016-05-11,2016-05-12,2016-05-13,2016-05-14,2016-05-15,2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-21,2016-05-22,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27,2016-05-28,2016-05-29,2016-05-30,2016-05-31,2016-06-01,2016-06-02,2016-06-03,2016-06-04,2016-06-05,2016-06-06,2016-06-07,2016-06-08,2016-06-09,2016-06-10,2016-06-11,2016-06-12,2016-06-13,2016-06-14,2016-06-15,2016-06-16,2016-06-17,2016-06-18,2016-06-19
0,FOODS_1_001_CA_1_validation,1.055092,0.91884,0.915976,0.918109,1.022137,1.346264,1.38256,1.126903,1.03032,0.969757,0.985647,1.077283,1.351484,1.345051,1.132013,0.991203,0.964964,0.955666,1.036369,1.317565,1.31368,1.105283,0.962133,0.966562,0.949689,1.049868,1.339131,1.328977,1.092411,0.926487,0.917608,0.916443,1.020471,1.316241,1.303711,1.062018,0.933398,1.005812,1.020867,1.176794,1.362684,1.366928,1.142754,1.003151,1.000819,1.010791,1.103468,1.355394,1.344137,1.111088,0.964196,0.964413,0.976597,1.078136,1.399529,1.344616
1,FOODS_1_001_CA_2_validation,0.927557,0.863747,0.860883,0.871776,0.966559,1.308972,1.2654,0.919464,0.89527,0.834707,0.850597,0.920396,1.239164,1.218511,0.924574,0.856153,0.848092,0.851333,0.910199,1.235963,1.214663,0.935494,0.864786,0.866569,0.890931,0.881433,1.289415,1.255652,0.952452,0.871394,0.862515,0.87011,0.964893,1.27895,1.24281,0.932525,0.876348,0.868805,0.88386,1.01795,1.248407,1.247811,0.933358,0.866143,0.863812,0.873784,0.944623,1.268054,1.243162,0.923596,0.849147,0.849363,0.886053,0.965755,1.335263,1.168902
2,FOODS_1_001_CA_3_validation,1.447849,1.267406,1.251709,1.258847,1.311925,1.617129,1.685371,1.58845,1.432341,1.340354,1.356244,1.396929,1.653853,1.6832,1.59356,1.393224,1.337883,1.341441,1.371194,1.618478,1.657993,1.521605,1.332705,1.308907,1.277385,1.326613,1.599762,1.634522,1.500155,1.274875,1.253341,1.254958,1.308036,1.589915,1.601829,1.452367,1.279557,1.37681,1.389642,1.494619,1.633794,1.672035,1.604701,1.405572,1.376105,1.383855,1.42558,1.643388,1.688176,1.58663,1.371292,1.345927,1.311049,1.37536,1.655435,1.650231
3,FOODS_1_001_CA_4_validation,0.616045,0.52292,0.520056,0.527195,0.507693,0.617257,0.589579,0.654126,0.598564,0.5352,0.557743,0.525849,0.581845,0.590442,0.663087,0.55485,0.565123,0.565552,0.522725,0.568738,0.579882,0.679211,0.570086,0.560594,0.563723,0.540372,0.625203,0.631592,0.650862,0.532041,0.521688,0.523306,0.503804,0.597365,0.601378,0.626661,0.541169,0.573105,0.585937,0.618335,0.553782,0.582213,0.672877,0.574295,0.571964,0.581353,0.540411,0.59577,0.609058,0.667167,0.568125,0.568341,0.595485,0.564392,0.617923,0.64595
4,FOODS_1_001_TX_1_validation,0.644069,0.550944,0.54808,0.555219,0.544591,0.748093,0.835312,0.710746,0.666776,0.597947,0.625955,0.602935,0.796549,0.821561,0.731299,0.616173,0.639845,0.640274,0.613003,0.796634,0.863128,0.733559,0.599386,0.589895,0.593023,0.578546,0.759687,0.787457,0.680163,0.561341,0.549712,0.55133,0.540702,0.730572,0.755966,0.652314,0.566821,0.650965,0.642557,0.69542,0.763021,0.818298,0.741089,0.642507,0.63521,0.649565,0.610607,0.816984,0.851653,0.748571,0.649529,0.683714,0.649832,0.602566,0.793715,0.801815


date,id,2016-04-25,2016-04-26,2016-04-27,2016-04-28,2016-04-29,2016-04-30,2016-05-01,2016-05-02,2016-05-03,2016-05-04,2016-05-05,2016-05-06,2016-05-07,2016-05-08,2016-05-09,2016-05-10,2016-05-11,2016-05-12,2016-05-13,2016-05-14,2016-05-15,2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-21,2016-05-22,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27,2016-05-28,2016-05-29,2016-05-30,2016-05-31,2016-06-01,2016-06-02,2016-06-03,2016-06-04,2016-06-05,2016-06-06,2016-06-07,2016-06-08,2016-06-09,2016-06-10,2016-06-11,2016-06-12,2016-06-13,2016-06-14,2016-06-15,2016-06-16,2016-06-17,2016-06-18,2016-06-19
30485,HOUSEHOLD_2_516_TX_2_validation,0.342746,0.283122,0.282798,0.289936,0.372108,0.450301,0.463294,0.339317,0.314413,0.277052,0.301484,0.377586,0.444253,0.449205,0.37052,0.289595,0.319608,0.320037,0.387248,0.444255,0.502149,0.372163,0.307966,0.310756,0.30779,0.388861,0.441476,0.459461,0.359448,0.285164,0.28443,0.286047,0.368219,0.434002,0.449052,0.346156,0.294164,0.317069,0.303148,0.434083,0.399162,0.44052,0.374048,0.31651,0.302038,0.320806,0.375056,0.453784,0.478109,0.381124,0.32345,0.361918,0.320457,0.402171,0.449318,0.466198
30486,HOUSEHOLD_2_516_TX_3_validation,0.348352,0.280531,0.280207,0.287345,0.355187,0.411906,0.371452,0.354456,0.326972,0.284247,0.308679,0.367996,2.357318,2.242823,1.904376,1.68755,1.715023,1.718188,2.050267,0.415378,0.433803,0.385101,0.305628,0.308418,0.305452,0.372193,0.40552,0.384037,0.365307,0.282573,0.281839,0.283456,0.351298,0.397793,0.373375,0.351762,0.291573,0.318499,0.31009,0.432312,0.373232,0.375121,0.386986,0.321251,0.306778,0.325546,0.365466,0.424907,0.409763,0.394062,0.32819,0.366658,0.325197,0.385503,0.413363,0.390774
30487,HOUSEHOLD_2_516_WI_1_validation,0.204864,0.209411,0.209087,0.216225,0.308837,0.46783,0.314791,0.210252,0.242267,0.199541,0.22804,0.290419,0.366002,0.301286,0.235917,0.213697,0.24371,0.244138,0.300081,0.385177,0.368038,0.241613,0.234508,0.237298,0.234332,0.325843,0.392427,0.329298,0.221818,0.211453,0.210719,0.212337,0.304948,0.384701,0.318636,0.223988,0.237698,0.225405,0.232788,0.350669,0.343342,0.325315,0.239445,0.226696,0.22666,0.244908,0.287889,0.377658,0.335231,0.246521,0.264599,0.294787,0.254077,0.339153,0.398499,0.334263
30488,HOUSEHOLD_2_516_WI_2_validation,0.286546,0.27446,0.274136,0.281274,0.3493,0.405441,0.260227,0.383831,0.422451,0.326802,0.3778,0.431977,0.417085,0.384239,0.402704,0.323828,0.386824,0.403755,0.410651,0.420698,0.388083,0.353078,0.302932,0.305722,0.302756,0.370313,0.3703,0.29252,0.307507,0.282886,0.275768,0.277385,0.345411,0.358566,0.278345,0.310168,0.318045,0.324241,0.412523,0.539226,0.417471,0.406468,0.424049,0.363122,0.397391,0.4258,0.406394,0.467641,0.42624,0.400138,0.391636,0.421823,0.347919,0.384698,0.36888,0.300529
30489,HOUSEHOLD_2_516_WI_3_validation,0.122546,0.098747,0.098423,0.105561,0.178917,0.257659,0.125901,0.221824,0.23396,0.133595,0.184593,0.25204,0.238305,0.24067,0.23598,0.130622,0.193617,0.210548,0.230715,0.250196,0.252792,0.186355,0.133436,0.12055,0.117584,0.19047,0.204716,0.161402,0.134048,0.107173,0.100055,0.101672,0.175028,0.202441,0.156686,0.138978,0.131409,0.108595,0.202456,0.342429,0.225392,0.2496,0.235749,0.148339,0.182608,0.211017,0.204881,0.270881,0.264691,0.211838,0.176852,0.20704,0.150621,0.21644,0.203089,0.169204


(30490, 57)

In [51]:
del params, model_lgb, y_pred
gc.collect()

48

In [52]:
pre_val = predictions.iloc[:,:29]

In [53]:
pre_eva = pd.concat([predictions.iloc[:,0],predictions.iloc[:,29:57]], axis=1)
pre_eva['id'] = pre_eva['id'].str.replace('_validation', '_evaluation')

In [54]:
del predictions
gc.collect()

78

In [55]:
pre_val.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
pre_eva.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

In [56]:
display(pre_val.head())
display(pre_val.tail())
display(pre_val.shape)

display(pre_eva.head())
display(pre_eva.tail())
display(pre_eva.shape)

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_validation,1.055092,0.91884,0.915976,0.918109,1.022137,1.346264,1.38256,1.126903,1.03032,0.969757,0.985647,1.077283,1.351484,1.345051,1.132013,0.991203,0.964964,0.955666,1.036369,1.317565,1.31368,1.105283,0.962133,0.966562,0.949689,1.049868,1.339131,1.328977
1,FOODS_1_001_CA_2_validation,0.927557,0.863747,0.860883,0.871776,0.966559,1.308972,1.2654,0.919464,0.89527,0.834707,0.850597,0.920396,1.239164,1.218511,0.924574,0.856153,0.848092,0.851333,0.910199,1.235963,1.214663,0.935494,0.864786,0.866569,0.890931,0.881433,1.289415,1.255652
2,FOODS_1_001_CA_3_validation,1.447849,1.267406,1.251709,1.258847,1.311925,1.617129,1.685371,1.58845,1.432341,1.340354,1.356244,1.396929,1.653853,1.6832,1.59356,1.393224,1.337883,1.341441,1.371194,1.618478,1.657993,1.521605,1.332705,1.308907,1.277385,1.326613,1.599762,1.634522
3,FOODS_1_001_CA_4_validation,0.616045,0.52292,0.520056,0.527195,0.507693,0.617257,0.589579,0.654126,0.598564,0.5352,0.557743,0.525849,0.581845,0.590442,0.663087,0.55485,0.565123,0.565552,0.522725,0.568738,0.579882,0.679211,0.570086,0.560594,0.563723,0.540372,0.625203,0.631592
4,FOODS_1_001_TX_1_validation,0.644069,0.550944,0.54808,0.555219,0.544591,0.748093,0.835312,0.710746,0.666776,0.597947,0.625955,0.602935,0.796549,0.821561,0.731299,0.616173,0.639845,0.640274,0.613003,0.796634,0.863128,0.733559,0.599386,0.589895,0.593023,0.578546,0.759687,0.787457


Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
30485,HOUSEHOLD_2_516_TX_2_validation,0.342746,0.283122,0.282798,0.289936,0.372108,0.450301,0.463294,0.339317,0.314413,0.277052,0.301484,0.377586,0.444253,0.449205,0.37052,0.289595,0.319608,0.320037,0.387248,0.444255,0.502149,0.372163,0.307966,0.310756,0.30779,0.388861,0.441476,0.459461
30486,HOUSEHOLD_2_516_TX_3_validation,0.348352,0.280531,0.280207,0.287345,0.355187,0.411906,0.371452,0.354456,0.326972,0.284247,0.308679,0.367996,2.357318,2.242823,1.904376,1.68755,1.715023,1.718188,2.050267,0.415378,0.433803,0.385101,0.305628,0.308418,0.305452,0.372193,0.40552,0.384037
30487,HOUSEHOLD_2_516_WI_1_validation,0.204864,0.209411,0.209087,0.216225,0.308837,0.46783,0.314791,0.210252,0.242267,0.199541,0.22804,0.290419,0.366002,0.301286,0.235917,0.213697,0.24371,0.244138,0.300081,0.385177,0.368038,0.241613,0.234508,0.237298,0.234332,0.325843,0.392427,0.329298
30488,HOUSEHOLD_2_516_WI_2_validation,0.286546,0.27446,0.274136,0.281274,0.3493,0.405441,0.260227,0.383831,0.422451,0.326802,0.3778,0.431977,0.417085,0.384239,0.402704,0.323828,0.386824,0.403755,0.410651,0.420698,0.388083,0.353078,0.302932,0.305722,0.302756,0.370313,0.3703,0.29252
30489,HOUSEHOLD_2_516_WI_3_validation,0.122546,0.098747,0.098423,0.105561,0.178917,0.257659,0.125901,0.221824,0.23396,0.133595,0.184593,0.25204,0.238305,0.24067,0.23598,0.130622,0.193617,0.210548,0.230715,0.250196,0.252792,0.186355,0.133436,0.12055,0.117584,0.19047,0.204716,0.161402


(30490, 29)

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_evaluation,1.092411,0.926487,0.917608,0.916443,1.020471,1.316241,1.303711,1.062018,0.933398,1.005812,1.020867,1.176794,1.362684,1.366928,1.142754,1.003151,1.000819,1.010791,1.103468,1.355394,1.344137,1.111088,0.964196,0.964413,0.976597,1.078136,1.399529,1.344616
1,FOODS_1_001_CA_2_evaluation,0.952452,0.871394,0.862515,0.87011,0.964893,1.27895,1.24281,0.932525,0.876348,0.868805,0.88386,1.01795,1.248407,1.247811,0.933358,0.866143,0.863812,0.873784,0.944623,1.268054,1.243162,0.923596,0.849147,0.849363,0.886053,0.965755,1.335263,1.168902
2,FOODS_1_001_CA_3_evaluation,1.500155,1.274875,1.253341,1.254958,1.308036,1.589915,1.601829,1.452367,1.279557,1.37681,1.389642,1.494619,1.633794,1.672035,1.604701,1.405572,1.376105,1.383855,1.42558,1.643388,1.688176,1.58663,1.371292,1.345927,1.311049,1.37536,1.655435,1.650231
3,FOODS_1_001_CA_4_evaluation,0.650862,0.532041,0.521688,0.523306,0.503804,0.597365,0.601378,0.626661,0.541169,0.573105,0.585937,0.618335,0.553782,0.582213,0.672877,0.574295,0.571964,0.581353,0.540411,0.59577,0.609058,0.667167,0.568125,0.568341,0.595485,0.564392,0.617923,0.64595
4,FOODS_1_001_TX_1_evaluation,0.680163,0.561341,0.549712,0.55133,0.540702,0.730572,0.755966,0.652314,0.566821,0.650965,0.642557,0.69542,0.763021,0.818298,0.741089,0.642507,0.63521,0.649565,0.610607,0.816984,0.851653,0.748571,0.649529,0.683714,0.649832,0.602566,0.793715,0.801815


Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
30485,HOUSEHOLD_2_516_TX_2_evaluation,0.359448,0.285164,0.28443,0.286047,0.368219,0.434002,0.449052,0.346156,0.294164,0.317069,0.303148,0.434083,0.399162,0.44052,0.374048,0.31651,0.302038,0.320806,0.375056,0.453784,0.478109,0.381124,0.32345,0.361918,0.320457,0.402171,0.449318,0.466198
30486,HOUSEHOLD_2_516_TX_3_evaluation,0.365307,0.282573,0.281839,0.283456,0.351298,0.397793,0.373375,0.351762,0.291573,0.318499,0.31009,0.432312,0.373232,0.375121,0.386986,0.321251,0.306778,0.325546,0.365466,0.424907,0.409763,0.394062,0.32819,0.366658,0.325197,0.385503,0.413363,0.390774
30487,HOUSEHOLD_2_516_WI_1_evaluation,0.221818,0.211453,0.210719,0.212337,0.304948,0.384701,0.318636,0.223988,0.237698,0.225405,0.232788,0.350669,0.343342,0.325315,0.239445,0.226696,0.22666,0.244908,0.287889,0.377658,0.335231,0.246521,0.264599,0.294787,0.254077,0.339153,0.398499,0.334263
30488,HOUSEHOLD_2_516_WI_2_evaluation,0.307507,0.282886,0.275768,0.277385,0.345411,0.358566,0.278345,0.310168,0.318045,0.324241,0.412523,0.539226,0.417471,0.406468,0.424049,0.363122,0.397391,0.4258,0.406394,0.467641,0.42624,0.400138,0.391636,0.421823,0.347919,0.384698,0.36888,0.300529
30489,HOUSEHOLD_2_516_WI_3_evaluation,0.134048,0.107173,0.100055,0.101672,0.175028,0.202441,0.156686,0.138978,0.131409,0.108595,0.202456,0.342429,0.225392,0.2496,0.235749,0.148339,0.182608,0.211017,0.204881,0.270881,0.264691,0.211838,0.176852,0.20704,0.150621,0.21644,0.203089,0.169204


(30490, 29)

### 誤差率

In [57]:
sta = pd.read_csv(path + "sales_train_evaluation.csv")
sta = sta[["d_1914", "d_1915","d_1916","d_1917","d_1918","d_1919","d_1920","d_1921","d_1922","d_1923","d_1924","d_1925","d_1926","d_1927","d_1928","d_1929","d_1930","d_1931","d_1932","d_1933","d_1934","d_1935","d_1936","d_1937","d_1938","d_1939","d_1940","d_1941"]]
sta.columns = ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]

for i in ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]:
    sta[i] = sta[i].astype('float64')

In [58]:
pre_val_temp = pre_val.drop("id",axis=1)

In [59]:
display(np.sqrt(mean_squared_error(sta, pre_val_temp)))
display(mean_absolute_error(sta, pre_val_temp))
display(r2_score(sta, pre_val_temp))

4.11101839883865

1.8042598650924553

-0.28452802600126803

In [60]:
del pre_val_temp, pre_val
gc.collect()

66

In [61]:
pre_uni = pd.concat([sta, pre_eva], axis=0)

In [62]:
pre_uni.to_csv('submission_lgb.csv', index = False)

In [64]:
del sta, pre_uni
gc.collect()

NameError: name 'sta' is not defined

### catboostの実行

In [65]:
# initialize Pool
train_pool = Pool(x_train[features], 
                  y_train)

test_pool = Pool(test[features]) 

In [66]:
# specify the training parameters
model2 = CatBoostRegressor(iterations=2000,
                          depth=5,
                          learning_rate=0.05,
                          loss_function='RMSE')

In [None]:
#train the model
model2.fit(train_pool)

0:	learn: 3.8584708	total: 25s	remaining: 13h 51m 44s
1:	learn: 3.8421271	total: 47.4s	remaining: 13h 9m 48s
2:	learn: 3.8270683	total: 1m 4s	remaining: 11h 50m 31s
3:	learn: 3.8133748	total: 1m 17s	remaining: 10h 44m 14s
4:	learn: 3.8007214	total: 1m 34s	remaining: 10h 28m 25s
5:	learn: 3.7881243	total: 1m 54s	remaining: 10h 36m 47s
6:	learn: 3.7771756	total: 2m 14s	remaining: 10h 40m 5s
7:	learn: 3.7674753	total: 2m 31s	remaining: 10h 27m 53s
8:	learn: 3.7582250	total: 2m 46s	remaining: 10h 12m 51s
9:	learn: 3.7498953	total: 3m 2s	remaining: 10h 6m 32s
10:	learn: 3.7423507	total: 3m 20s	remaining: 10h 3m 56s
11:	learn: 3.7348517	total: 3m 37s	remaining: 10h 1m
12:	learn: 3.7284040	total: 3m 56s	remaining: 10h 1m 56s
13:	learn: 3.7220883	total: 4m 14s	remaining: 10h 46s
14:	learn: 3.7163472	total: 4m 31s	remaining: 9h 59m 50s
15:	learn: 3.7112327	total: 4m 47s	remaining: 9h 54m 22s
16:	learn: 3.7053627	total: 5m 3s	remaining: 9h 49m 37s
17:	learn: 3.7009518	total: 5m 20s	remaining: 9h

In [None]:
# make the prediction using the resulting model
preds2 = model2.predict(test_pool)

In [None]:
del model2
gc.collect()

In [None]:
test2 = test
test2['vol'] = preds2

In [None]:
predictions2 = test2[['id', 'date', 'vol']]
predictions2 = pd.pivot(predictions2, index = 'id', columns = 'date', values = 'vol').reset_index()

In [None]:
del train_pool, test_pool, test2, preds2
gc.collect()

In [None]:
pre_val2 = predictions2.iloc[:,:29]

In [None]:
pre_eva2 = pd.concat([predictions2.iloc[:,0],predictions2.iloc[:,29:57]], axis=1)
pre_eva2['id'] = pre_eva2['id'].str.replace('_validation', '_evaluation')

In [None]:
del predictions2
gc.collect()

In [None]:
pre_val2.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
pre_eva2.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

In [None]:
display(pre_val2.head())
display(pre_val2.tail())
display(pre_val2.shape)

display(pre_eva2.head())
display(pre_eva2.tail())
display(pre_eva2.shape)

### 誤差率

In [None]:
sta = pd.read_csv(path + "sales_train_evaluation.csv")
sta = sta[["d_1914", "d_1915","d_1916","d_1917","d_1918","d_1919","d_1920","d_1921","d_1922","d_1923","d_1924","d_1925","d_1926","d_1927","d_1928","d_1929","d_1930","d_1931","d_1932","d_1933","d_1934","d_1935","d_1936","d_1937","d_1938","d_1939","d_1940","d_1941"]]
sta.columns = ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]

for i in ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]:
    sta[i] = sta[i].astype('float64')

In [None]:
pre_val2_temp = pre_val2.drop("id",axis=1)

In [None]:
display(np.sqrt(mean_squared_error(sta, pre_val2_temp)))
display(mean_absolute_error(sta, pre_val2_temp))
display(r2_score(sta, pre_val2_temp))

In [None]:
del pre_val2_temp, pre_val2
gc.collect()

In [None]:
pre_uni2 = pd.concat([sta, pre_eva2], axis=0)

In [None]:
pre_uni2.to_csv('submission_ctb.csv', index = False)

In [None]:
del sta, pre_uni2, pre_eval2
gc.collect()

### XGboost モデルの実行

In [None]:
xgb_features = [
    "store_id",
    "sell_price",
    "event_name_1",
    "event_type_1",
    "event_name_2",
    "event_type_2",
    "year",
    "month",
    "week",
    "day",
    "dayofweek"
]

In [None]:
# 学習用のパラメータ
xgb_params = {
        # 回帰問題
        'objective': 'reg:linear',
        # 学習用の指標 (RMSE)
        'eval_metric': 'rmse',
    }
# param['nthread'] = 4
# param['eval_metric'] = 'auc'

In [None]:
dtrain = xgb.DMatrix(x_train[xgb_features], label=y_train)
dtest = xgb.DMatrix(x_val[xgb_features], label=y_val)

In [None]:
evals = [(dtest, 'eval'), (dtrain, 'train')]

In [None]:
evals_result = {}

In [None]:
model3 = xgb.train(xgb_params,
                    dtrain,
                    num_boost_round=1000,
                    early_stopping_rounds=10,
                    evals=evals,
                    evals_result=evals_result,
                    )

In [None]:
# 検証用データが各クラスに分類される確率を計算する
y_pred3 = model3.predict(dtest)

In [None]:
del model3, xgb_params, xgb_features, dtrain, dtest, evals, x_val, y_val, 
gc.collect()

In [None]:
test3 = test
test3['vol'] = y_pred3

In [None]:
predictions3 = test3[['id', 'date', 'vol']]
predictions3 = pd.pivot(predictions3, index = 'id', columns = 'date', values = 'vol').reset_index()

In [None]:
pre_val3 = predictions3.iloc[:,:29]

In [None]:
pre_eva3 = pd.concat([predictions3.iloc[:,0],predictions3.iloc[:,29:57]], axis=1)
pre_eva3['id'] = pre_eva3['id'].str.replace('_validation', '_evaluation')

In [None]:
del test3, prediction3, 
gc.collect()

In [None]:
pre_val3.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
pre_eva3.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

In [None]:
display(pre_val3.head())
display(pre_val3.tail())
display(pre_val3.shape)

display(pre_eva3.head())
display(pre_eva3.tail())
display(pre_eva3.shape)

In [None]:
sta = pd.read_csv(path + "sales_train_evaluation.csv")
sta = sta[["d_1914", "d_1915","d_1916","d_1917","d_1918","d_1919","d_1920","d_1921","d_1922","d_1923","d_1924","d_1925","d_1926","d_1927","d_1928","d_1929","d_1930","d_1931","d_1932","d_1933","d_1934","d_1935","d_1936","d_1937","d_1938","d_1939","d_1940","d_1941"]]
sta.columns = ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]

for i in ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]:
    sta[i] = sta[i].astype('float64')

In [None]:
pre_val3_temp = pre_val3.drop("id",axis=1)

In [None]:
display(np.sqrt(mean_squared_error(sta, pre_val3_temp)))
display(mean_absolute_error(sta, pre_val3_temp))
display(r2_score(sta, pre_val3_temp))

In [None]:
del features, x_train, y_train, x_val, y_val, test, pre_val3_temp, pre_val3
gc.collect()

In [None]:
pre_uni3 = pd.concat([sta, pre_eva3], axis=0)

In [None]:
pre_uni3.to_csv('submission_xgb.csv', index = False)

In [None]:
del sta, pre_uni3, pre_eval3
gc.collect()

### データ統合 csv保存

In [None]:
pre_uni  = pd.read_csv(path + "submission_lgb.csv") 
pre_uni2 = pd.read_csv(path + "submission_ctb.csv") 
pre_uni3 = pd.read_csv(path + "submission_xgb.csv") 

In [None]:
pre_uni_am = pre_uni.set_index('id') * 0.2 + pre_uni2.set_index('id') * 0.7 + pre_uni2.set_index('id') * 0.1

In [None]:
# pre_uni_am = pre_uni_am.reset_index()

In [None]:
del pre_uni, pre_uni2, pre_uni3
gc.collect()

In [None]:
display(pre_uni_am.head())
display(pre_uni_am.tail())
display(pre_uni_am.shape)

In [None]:
pre_uni_am.to_csv('submission_uni.csv', index = False)

In [None]:
del pre_uni_am
gc.collect()