## 参考  
### kaggle notebook lightGBM
https://www.kaggle.com/girmdshinsei/for-japanese-beginner-with-wrmsse-in-lgbm?scriptVersionId=31044557

### lightGBM 時系列
https://qiita.com/ground0state/items/657861de619a4e4a30de

### Quick start catboost
https://catboost.ai/docs/concepts/python-quickstart.html

### Pythonでcatboostを使ってみる
#### (cat_featuresの使い方を調べた)
https://qiita.com/shin_mura/items/3d9ce25a60bdd25a3333

### XGBoostパラメータのまとめとランダムサーチ実装
https://qiita.com/FJyusk56/items/0649f4362587261bd57a

In [1]:
import pandas as pd
import os
import gc
import numpy as np
import matplotlib.pyplot as plt

# ラベルエンコーダー
from sklearn import preprocessing, metrics

# lightGBM
import lightgbm as lgb

# CatBoost
from catboost import Pool, CatBoostRegressor

# XGBoost
import xgboost as xgb

# 1セルでまとめて.head()、.tail()等を入力しても大丈夫になる
from IPython.display import display

# 誤差算定
from sklearn.metrics import mean_absolute_error

# 二乗平均平方根誤差 (RMSE)
from sklearn.metrics import mean_squared_error

# 決定係数
from sklearn.metrics import r2_score

import optuna.integration.lightgbm as lgb_o

import warnings
warnings.filterwarnings('ignore')

# DataFrameの表示数を変更
pd.set_option('max_columns', 500)
pd.set_option('max_rows', 500)

### データ作成

In [2]:
# ローカル用
path = os.getcwd() + "/"

# kaggle Notebook用
INPUT_DIR = '../input/m5-forecasting-accuracy'

calendar.csv -製品の販売日に関する情報が含まれています。
sales_train_validation.csv -製品および店舗ごとの過去の毎日の販売台数データが​​含まれています [d_1 - d_1913]
sample_submission.csv-提出の正しい形式。詳細については、「評価」タブを参照してください。
sell_prices.csv -店舗および日付ごとに販売された製品の価格に関する情報が含まれています。

sales_train_evaluation.csv-締め切りの1か月前に1回ご利用いただけます。売上高が含まれます[d_1 - d_1941]

各行は含むidの連結であるitem_idとstore_idのいずれかである、validation（公共のランキングに対応する）、またはevaluation（プライベートランキングに対応します）。　　 F1-F28各行で販売されるアイテムの28予測日（）を予測しています。　　 以下のためのvalidation行、これに相当するd_1914 - d_1941、とのためevaluationの行、これに相当しますd_1942 - d_1969。　　

validation d_1914 - d_1941の単価と量を予測する。１か月前に１回公開される。 evaluation d_1942-d_1969の単価と量を予測する

### 作成データの読込

In [3]:
try:
    stv_melt = pd.read_pickle(path + "melt_stv.pkl")
except FileNotFoundError:
    1

### 通常通りcsvを読み込んでデータを作る

In [None]:
# sales_train_validation.csv
try:
    stv = pd.read_csv(path + "sales_train_validation.csv") # ローカル用
except FileNotFoundError:
    stv = pd.read_csv(f"{INPUT_DIR}/sales_train_validation.csv") # kaggle用

    
# calendar.csv
try:
    cal = pd.read_csv(path + "calendar.csv") # ローカル用
except FileNotFoundError:
    cal = pd.read_csv(f"{INPUT_DIR}/calendar.csv") # kaggle用

    
# sell_prices.csv
try:
    price = pd.read_csv(path + "sell_prices.csv") # ローカル用
except FileNotFoundError:
    price = pd.read_csv(f"{INPUT_DIR}/sell_prices.csv") # kaggle用

    
# sample_submission.csv
try:
    ss = pd.read_csv(path + "sample_submission.csv") # ローカル用
except FileNotFoundError:
    ss = pd.read_csv(f"{INPUT_DIR}/sample_submission.csv") # kaggle用

stv.shape

### ロースペックマシン限定

stv = pd.read_csv(path + "sales_train_validation.csv",
                               skiprows=lambda x: x not in range(0,1001))

cal = pd.read_csv(path + "calendar.csv",
                               skiprows=lambda x: x not in range(0,3001))

price = pd.read_csv(path + "sell_prices.csv",
                               skiprows=lambda x: x not in range(0,3001))

ss = pd.read_csv(path + "sample_submission.csv",
                               skiprows=lambda x: x not in range(0,1001))

In [None]:
display(stv.head())
display(stv.tail())
display(stv.dtypes)
display(cal.head())
display(cal.tail())
display(cal.dtypes)
display(cal.max())
display(price.head())
display(price.tail())
display(price.dtypes)
display(price.max())
display(price.shape)
display(ss.head())
display(ss.tail())
display(ss.shape)

In [None]:
day1_1913 = [f"d_{i}" for i in range(1, 1914)]

In [None]:
stv_melt =  pd.melt(stv, id_vars=['id','store_id','item_id'],
           value_vars=day1_1913,
           var_name = "d", value_name = "vol")

In [None]:
del day1_1913
gc.collect()

In [None]:
product = stv[["id","item_id","store_id"]]

In [None]:
ss_val = ss[0:30490]
ss_val.columns = ["id"] + [f"d_{d}" for d in range(1914, 1942)]

ss_eva = ss[30490:60980]
ss_eva.columns = ["id"] + [f"d_{d}" for d in range(1942, 1970)]

In [None]:
ss_eva['id'] = ss_eva['id'].str.replace('_evaluation','_validation')

In [None]:
ss_val = pd.merge(ss_val, product, how = 'left', left_on = ['id'], right_on = ['id'])
ss_eva = pd.merge(ss_eva, product, how = 'left', left_on = ['id'], right_on = ['id'])

In [None]:
display(ss_val.head(3))
display(ss_val.tail(3))
display(ss_val.shape)
display(ss_eva.head(3))
display(ss_eva.tail(3))
display(ss_eva.shape)

In [None]:
val_1914_1941 = [f"d_{i}" for i in range(1914, 1942)]
eva_1942_1969 = [f"d_{i}" for i in range(1942, 1970)]

In [None]:
val_melt =  pd.melt(ss_val, id_vars=['id','store_id', "item_id"],
           value_vars=val_1914_1941,
           var_name = "d", value_name = "vol")
eva_melt =  pd.melt(ss_eva, id_vars=['id','store_id', "item_id"],
           value_vars=eva_1942_1969,
           var_name = "d", value_name = "vol")

In [None]:
stv_melt = pd.concat([stv_melt, val_melt, eva_melt])

In [None]:
display(stv_melt.head(3))
display(stv_melt.tail(3))
display(stv_melt.shape)

In [None]:
del ss, ss_val, ss_eva, val_1914_1941, eva_1942_1969, val_melt, eva_melt, product
gc.collect()

In [None]:
cal = cal[["date","wm_yr_wk","d","event_name_1","event_type_1","event_name_2","event_type_2"]]

In [None]:
stv_melt = pd.merge(stv_melt, cal, how = 'left', left_on = ['d'], right_on = ['d'])

In [None]:
del cal
gc.collect()

In [None]:
display(stv_melt.head())
display(stv_melt.tail())
display(stv_melt.dtypes)
display(stv_melt.shape)

In [None]:
stv_melt = stv_melt.merge(price, on = ['store_id', 'item_id', 'wm_yr_wk'], how = 'left')

In [None]:
del price
gc.collect()

In [None]:
display(stv_melt.head())
display(stv_melt.tail())
display(stv_melt.dtypes)
display(stv_melt.shape)

### 特徴量作成

In [None]:
stv_melt["date2"] = pd.to_datetime(stv_melt["date"])

In [None]:
stv_melt["year"] = stv_melt["date2"].dt.year
stv_melt["month"] = stv_melt["date2"].dt.month
stv_melt["week"] = stv_melt["date2"].dt.week
stv_melt["day"] = stv_melt["date2"].dt.day
stv_melt["dayofweek"] = stv_melt["date2"].dt.dayofweek

stv_melt["year"] = stv_melt["year"].astype('int8')
stv_melt["month"] = stv_melt["month"].astype('int8')
stv_melt["week"] = stv_melt["week"].astype('int8')
stv_melt["day"] = stv_melt["day"].astype('int8')
stv_melt["dayofweek"] = stv_melt["dayofweek"].astype('int8')

In [None]:
stv_melt = stv_melt.drop("date2", axis=1)

In [None]:
#　ラグの作成
for i in [7,30,90]:
    stv_melt['shift%s'%i] = stv_melt["vol"].shift(i)

In [None]:
# 平均値
'''
for i in [7,30,90]:
    stv_melt['mean%s'%i] = stv_melt["vol"].rolling(i).mean()
'''

In [None]:
# 中央値
'''
for i in [7,30,90]:
    stv_melt['median%s'%i] = stv_melt["vol"].rolling(i).median()
'''

In [None]:
# 最小値
'''
for i in [7,30,90]:
    stv_melt['min%s'%i] = stv_melt["vol"].rolling(i).min()
'''

In [None]:
display(stv_melt.head(3))
display(stv_melt.tail(3))
display(stv_melt.dtypes)

In [None]:
stv_melt["vol"] = stv_melt[["vol"]].astype('int16')
stv_melt["wm_yr_wk"] = stv_melt[ "wm_yr_wk"].astype('int16')

In [None]:
stv_melt["sell_price"] = stv_melt["sell_price"].astype('float16')
stv_melt["shift7"] = stv_melt["shift7"].astype('float16')

In [None]:
stv_melt["shift30"] = stv_melt["shift30"].astype('float16')
stv_melt["shift90"] = stv_melt["shift90"].astype('float16')

In [None]:
# ラベルエンコーダー
lbl = preprocessing.LabelEncoder()
stv_melt["store_id"] = lbl.fit_transform(stv_melt["store_id"])
stv_melt["item_id"] = lbl.fit_transform(stv_melt["item_id"])

stv_melt["store_id"] = stv_melt["store_id"].astype('int8')
stv_melt["item_id"] = stv_melt["item_id"].astype('int8')

In [None]:
# event は欠損値があるので前処理
stv_melt["event_name_1"] = stv_melt["event_name_1"].fillna("missing", inplace=True)
stv_melt["event_type_1"] = stv_melt["event_type_1"].fillna("missing", inplace=True)
stv_melt["event_name_2"] = stv_melt["event_name_2"].fillna("missing", inplace=True)
stv_melt["event_type_2"] = stv_melt["event_type_2"].fillna("missing", inplace=True)

stv_melt["event_name_1"] = lbl.fit_transform(stv_melt["event_name_1"])
stv_melt["event_type_1"] = lbl.fit_transform(stv_melt["event_type_1"])
stv_melt["event_name_2"] = lbl.fit_transform(stv_melt["event_name_2"])
stv_melt["event_type_2"] = lbl.fit_transform(stv_melt["event_type_2"])

stv_melt["event_name_1"] = stv_melt["event_name_1"].astype('int8')
stv_melt["event_name_2"] = stv_melt["event_name_2"].astype('int8')
stv_melt["event_type_1"] = stv_melt["event_type_1"].astype('int8')
stv_melt["event_type_2"] = stv_melt["event_type_2"].astype('int8')

In [None]:
display(stv_melt.head(3))
display(stv_melt.tail(3))
display(stv_melt.dtypes)

### 学習用データセットの作成

In [8]:
x_train = stv_melt[stv_melt['date'] <= '2016-03-27']
y_train = x_train['vol']
x_val   = stv_melt[(stv_melt['date'] > '2016-03-27') & (stv_melt['date'] <= '2016-04-24')]
y_val   = x_val['vol']
test    = stv_melt[(stv_melt['date'] > '2016-04-24')]

In [9]:
display(test.head())
display(test.tail())
display(test.dtypes)

Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
58327370,HOBBIES_1_001_CA_1_validation,0,-99,d_1914,0,2016-04-25,11613,0,0,0,0,8.382812,-32,4,17,25,0,0.0,2.0,0.0
58327371,HOBBIES_1_002_CA_1_validation,0,-98,d_1914,0,2016-04-25,11613,0,0,0,0,3.970703,-32,4,17,25,0,1.0,0.0,2.0
58327372,HOBBIES_1_003_CA_1_validation,0,-97,d_1914,0,2016-04-25,11613,0,0,0,0,2.970703,-32,4,17,25,0,1.0,10.0,12.0
58327373,HOBBIES_1_004_CA_1_validation,0,-96,d_1914,0,2016-04-25,11613,0,0,0,0,4.640625,-32,4,17,25,0,0.0,2.0,0.0
58327374,HOBBIES_1_005_CA_1_validation,0,-95,d_1914,0,2016-04-25,11613,0,0,0,0,2.880859,-32,4,17,25,0,0.0,0.0,4.0


Unnamed: 0,id,store_id,item_id,d,vol,date,wm_yr_wk,event_name_1,event_type_1,event_name_2,event_type_2,sell_price,year,month,week,day,dayofweek,shift7,shift30,shift90
60034805,FOODS_3_823_WI_3_validation,9,-104,d_1969,0,2016-06-19,11621,0,0,0,0,2.980469,-32,6,24,19,6,0.0,0.0,0.0
60034806,FOODS_3_824_WI_3_validation,9,-103,d_1969,0,2016-06-19,11621,0,0,0,0,2.480469,-32,6,24,19,6,0.0,0.0,0.0
60034807,FOODS_3_825_WI_3_validation,9,-102,d_1969,0,2016-06-19,11621,0,0,0,0,3.980469,-32,6,24,19,6,0.0,0.0,0.0
60034808,FOODS_3_826_WI_3_validation,9,-101,d_1969,0,2016-06-19,11621,0,0,0,0,1.280273,-32,6,24,19,6,0.0,0.0,0.0
60034809,FOODS_3_827_WI_3_validation,9,-100,d_1969,0,2016-06-19,11621,0,0,0,0,1.0,-32,6,24,19,6,0.0,0.0,0.0


id               object
store_id           int8
item_id            int8
d                object
vol               int16
date             object
wm_yr_wk          int16
event_name_1       int8
event_type_1       int8
event_name_2       int8
event_type_2       int8
sell_price      float16
year               int8
month              int8
week               int8
day                int8
dayofweek          int8
shift7          float16
shift30         float16
shift90         float16
dtype: object

In [10]:
del stv_melt
gc.collect()

22

### lightGBM モデルの実行

In [21]:
features = [
    "store_id",
    "item_id",
    "sell_price",
    "shift7",
    "shift30",
    "shift90",
    "event_name_1",
    "event_type_1",
    "event_name_2",
    "event_type_2",
    "year",
    "month",
    "week",
    "day",
    "dayofweek"
]

In [22]:
params = {
    'boosting_type': 'gbdt',
    'metric': 'rmse',
    'objective': 'regression',
    'n_jobs': -1,
    'seed': 236,
    'learning_rate': 0.1,
    'bagging_fraction': 0.75,
    'bagging_freq': 10, 
    'colsample_bytree': 0.75
}

In [23]:
train_set = lgb.Dataset(x_train[features], y_train)
val_set = lgb.Dataset(x_val[features], y_val)

In [24]:
model_lgb = lgb.train(params, train_set, num_boost_round = 10, early_stopping_rounds = 2, valid_sets = [train_set, val_set], verbose_eval = 1)

# model_lgb = lgb.train(params, train_set, num_boost_round = 2500, early_stopping_rounds = 50, valid_sets = [train_set, val_set], verbose_eval = 100)

[1]	training's rmse: 3.80639	valid_1's rmse: 3.53132
Training until validation scores don't improve for 2 rounds
[2]	training's rmse: 3.74756	valid_1's rmse: 3.47417
[3]	training's rmse: 3.74209	valid_1's rmse: 3.46698
[4]	training's rmse: 3.73636	valid_1's rmse: 3.45793
[5]	training's rmse: 3.66877	valid_1's rmse: 3.40395
[6]	training's rmse: 3.63185	valid_1's rmse: 3.38377
[7]	training's rmse: 3.59949	valid_1's rmse: 3.35783
[8]	training's rmse: 3.56849	valid_1's rmse: 3.33782
[9]	training's rmse: 3.55128	valid_1's rmse: 3.32423
[10]	training's rmse: 3.54708	valid_1's rmse: 3.31669
Did not meet early stopping. Best iteration is:
[10]	training's rmse: 3.54708	valid_1's rmse: 3.31669


In [25]:
val_pred = model_lgb.predict(x_val[features])
val_score = np.sqrt(metrics.mean_squared_error(val_pred, y_val))
print(f'Our val rmse score は {val_score}')

Our val rmse score は 3.316693496298533


In [26]:
y_pred = model_lgb.predict(test[features])
test['vol'] = y_pred

In [27]:
predictions = test[['id', 'date', 'vol']]
predictions = pd.pivot(predictions, index = 'id', columns = 'date', values = 'vol').reset_index()

In [28]:
display(predictions.head())
display(predictions.tail())
display(predictions.shape)

date,id,2016-04-25,2016-04-26,2016-04-27,2016-04-28,2016-04-29,2016-04-30,2016-05-01,2016-05-02,2016-05-03,2016-05-04,2016-05-05,2016-05-06,2016-05-07,2016-05-08,2016-05-09,2016-05-10,2016-05-11,2016-05-12,2016-05-13,2016-05-14,2016-05-15,2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-21,2016-05-22,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27,2016-05-28,2016-05-29,2016-05-30,2016-05-31,2016-06-01,2016-06-02,2016-06-03,2016-06-04,2016-06-05,2016-06-06,2016-06-07,2016-06-08,2016-06-09,2016-06-10,2016-06-11,2016-06-12,2016-06-13,2016-06-14,2016-06-15,2016-06-16,2016-06-17,2016-06-18,2016-06-19
0,FOODS_1_001_CA_1_validation,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472
1,FOODS_1_001_CA_2_validation,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472
2,FOODS_1_001_CA_3_validation,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859
3,FOODS_1_001_CA_4_validation,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627
4,FOODS_1_001_TX_1_validation,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982


date,id,2016-04-25,2016-04-26,2016-04-27,2016-04-28,2016-04-29,2016-04-30,2016-05-01,2016-05-02,2016-05-03,2016-05-04,2016-05-05,2016-05-06,2016-05-07,2016-05-08,2016-05-09,2016-05-10,2016-05-11,2016-05-12,2016-05-13,2016-05-14,2016-05-15,2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-21,2016-05-22,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27,2016-05-28,2016-05-29,2016-05-30,2016-05-31,2016-06-01,2016-06-02,2016-06-03,2016-06-04,2016-06-05,2016-06-06,2016-06-07,2016-06-08,2016-06-09,2016-06-10,2016-06-11,2016-06-12,2016-06-13,2016-06-14,2016-06-15,2016-06-16,2016-06-17,2016-06-18,2016-06-19
30485,HOUSEHOLD_2_516_TX_2_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30486,HOUSEHOLD_2_516_TX_3_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,2.135707,2.135707,2.036149,2.036149,2.036149,2.036149,2.036149,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30487,HOUSEHOLD_2_516_WI_1_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30488,HOUSEHOLD_2_516_WI_2_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30489,HOUSEHOLD_2_516_WI_3_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621


(30490, 57)

In [29]:
del features, params, x_train, y_train, x_val, y_val, test, model_lgb
gc.collect()

112

In [30]:
pre_val = predictions.iloc[:,:29]

In [31]:
pre_eva = pd.concat([predictions.iloc[:,0],predictions.iloc[:,29:57]], axis=1)
pre_eva['id'] = pre_eva['id'].str.replace('_validation', '_evaluation')

In [32]:
del predictions
gc.collect()

78

In [33]:
display(pre_val.head())
display(pre_val.tail())
display(pre_val.shape)

display(pre_eva.head())
display(pre_eva.tail())
display(pre_eva.shape)

date,id,2016-04-25,2016-04-26,2016-04-27,2016-04-28,2016-04-29,2016-04-30,2016-05-01,2016-05-02,2016-05-03,2016-05-04,2016-05-05,2016-05-06,2016-05-07,2016-05-08,2016-05-09,2016-05-10,2016-05-11,2016-05-12,2016-05-13,2016-05-14,2016-05-15,2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-21,2016-05-22
0,FOODS_1_001_CA_1_validation,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472
1,FOODS_1_001_CA_2_validation,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472
2,FOODS_1_001_CA_3_validation,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859
3,FOODS_1_001_CA_4_validation,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627
4,FOODS_1_001_TX_1_validation,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982


date,id,2016-04-25,2016-04-26,2016-04-27,2016-04-28,2016-04-29,2016-04-30,2016-05-01,2016-05-02,2016-05-03,2016-05-04,2016-05-05,2016-05-06,2016-05-07,2016-05-08,2016-05-09,2016-05-10,2016-05-11,2016-05-12,2016-05-13,2016-05-14,2016-05-15,2016-05-16,2016-05-17,2016-05-18,2016-05-19,2016-05-20,2016-05-21,2016-05-22
30485,HOUSEHOLD_2_516_TX_2_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30486,HOUSEHOLD_2_516_TX_3_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,2.135707,2.135707,2.036149,2.036149,2.036149,2.036149,2.036149,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30487,HOUSEHOLD_2_516_WI_1_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30488,HOUSEHOLD_2_516_WI_2_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30489,HOUSEHOLD_2_516_WI_3_validation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621


(30490, 29)

Unnamed: 0,id,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27,2016-05-28,2016-05-29,2016-05-30,2016-05-31,2016-06-01,2016-06-02,2016-06-03,2016-06-04,2016-06-05,2016-06-06,2016-06-07,2016-06-08,2016-06-09,2016-06-10,2016-06-11,2016-06-12,2016-06-13,2016-06-14,2016-06-15,2016-06-16,2016-06-17,2016-06-18,2016-06-19
0,FOODS_1_001_CA_1_evaluation,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472
1,FOODS_1_001_CA_2_evaluation,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472,1.244532,1.244532,1.244532,1.244532,1.244532,1.349472,1.349472
2,FOODS_1_001_CA_3_evaluation,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859,1.468242,1.468242,1.468242,1.468242,1.468242,1.544859,1.544859
3,FOODS_1_001_CA_4_evaluation,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627,1.069423,1.069423,1.069423,1.069423,1.069423,1.074627,1.074627
4,FOODS_1_001_TX_1_evaluation,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982,1.104163,1.104163,1.104163,1.104163,1.104163,1.17982,1.17982


Unnamed: 0,id,2016-05-23,2016-05-24,2016-05-25,2016-05-26,2016-05-27,2016-05-28,2016-05-29,2016-05-30,2016-05-31,2016-06-01,2016-06-02,2016-06-03,2016-06-04,2016-06-05,2016-06-06,2016-06-07,2016-06-08,2016-06-09,2016-06-10,2016-06-11,2016-06-12,2016-06-13,2016-06-14,2016-06-15,2016-06-16,2016-06-17,2016-06-18,2016-06-19
30485,HOUSEHOLD_2_516_TX_2_evaluation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30486,HOUSEHOLD_2_516_TX_3_evaluation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30487,HOUSEHOLD_2_516_WI_1_evaluation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30488,HOUSEHOLD_2_516_WI_2_evaluation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621
30489,HOUSEHOLD_2_516_WI_3_evaluation,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621,0.847096,0.847096,0.847096,0.847096,0.847096,0.905621,0.905621


(30490, 29)

In [34]:
pre_val.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
pre_eva.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

### catboostの実行

In [35]:
# initialize Pool
train_pool = Pool(x_train[features], 
                  y_train)

test_pool = Pool(test[features]) 

# specify the training parameters
# model2 = CatBoostRegressor(iterations=2000,
#                          depth=5,
#                          learning_rate=0.05,
#                          loss_function='RMSE')

model2 = CatBoostRegressor(iterations=20,
                          depth=1,
                          learning_rate=0.5,
                          loss_function='RMSE')
#train the model
model2.fit(train_pool)
# make the prediction using the resulting model
preds2 = model2.predict(test_pool)

NameError: name 'x_train' is not defined

In [None]:
test2 = test
test2['vol'] = preds2

In [None]:
predictions2 = test2[['id', 'date', 'vol']]
predictions2 = pd.pivot(predictions2, index = 'id', columns = 'date', values = 'vol').reset_index()

In [None]:
pre_val2 = predictions2.iloc[:,:29]

In [None]:
pre_eva2 = pd.concat([predictions2.iloc[:,0],predictions2.iloc[:,29:57]], axis=1)
pre_eva2['id'] = pre_eva2['id'].str.replace('_validation', '_evaluation')

In [None]:
pre_val2.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
pre_eva2.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

In [None]:
display(pre_val2.head())
display(pre_val2.tail())
display(pre_val2.shape)

display(pre_eva2.head())
display(pre_eva2.tail())
display(pre_eva2.shape)

### XGboost モデルの実行

In [4]:
xgb_features = [
    "store_id",
    "sell_price",
    "event_name_1",
    "event_type_1",
    "event_name_2",
    "event_type_2",
    "year",
    "month",
    "week",
    "day",
    "dayofweek"
]

In [5]:
# 学習用のパラメータ
xgb_params = {
    'objective': 'reg:linear',
}

In [6]:
model3 = xgb.XGBRegressor(**xgb_params)

In [11]:
model3.fit(x_train[xgb_features], y_train)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

In [13]:
# 検証用データが各クラスに分類される確率を計算する
y_pred3 = model3.predict(test[xgb_features])

In [15]:
test3 = test
test3['vol'] = y_pred3

In [16]:
predictions3 = test3[['id', 'date', 'vol']]
predictions3 = pd.pivot(predictions3, index = 'id', columns = 'date', values = 'vol').reset_index()

In [17]:
pre_val3 = predictions3.iloc[:,:29]

In [18]:
pre_eva3 = pd.concat([predictions3.iloc[:,0],predictions3.iloc[:,29:57]], axis=1)
pre_eva3['id'] = pre_eva3['id'].str.replace('_validation', '_evaluation')

In [19]:
pre_val3.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
pre_eva3.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

In [20]:
display(pre_val3.head())
display(pre_val3.tail())
display(pre_val3.shape)

display(pre_eva3.head())
display(pre_eva3.tail())
display(pre_eva3.shape)

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_validation,1.308948,1.252811,1.252811,1.252811,1.413603,1.819387,1.918191,1.471986,1.415849,1.403023,1.403023,1.521025,1.905519,1.905519,1.45916,1.403023,1.403023,1.403023,1.521025,1.905519,1.905519,1.391581,1.297289,1.297289,1.289643,1.445847,1.85163,1.846467
1,FOODS_1_001_CA_2_validation,1.126383,1.070246,1.070246,1.070246,1.231038,1.636822,1.735626,1.289422,1.233284,1.220458,1.220458,1.33846,1.722955,1.722955,1.276596,1.220458,1.220458,1.220458,1.33846,1.722955,1.722955,1.209016,1.114724,1.114724,1.107078,1.263282,1.669065,1.663902
2,FOODS_1_001_CA_3_validation,1.77477,1.72631,1.72631,1.72631,1.841045,2.17022,2.269024,1.937808,1.889348,1.876522,1.876522,1.948467,2.256353,2.256353,1.924982,1.876522,1.876522,1.876522,1.948467,2.256353,2.256353,1.857402,1.770788,1.770788,1.763142,1.873289,2.202464,2.197301
3,FOODS_1_001_CA_4_validation,0.839916,0.791457,0.791457,0.791457,0.895967,1.098913,1.197717,1.002955,0.954495,0.941669,0.941669,1.003389,1.185046,1.185046,0.990129,0.941669,0.941669,0.941669,1.003389,1.185046,1.185046,0.922549,0.835935,0.835935,0.828289,0.928211,1.131157,1.125993
4,FOODS_1_001_TX_1_validation,0.887841,0.839382,0.839382,0.839382,0.943892,1.224599,1.356287,1.083763,1.035303,1.022477,1.022477,1.084197,1.343615,1.343615,1.070936,1.022477,1.022477,1.022477,1.084197,1.343615,1.343615,1.003357,0.893559,0.88386,0.876214,0.976135,1.256843,1.251679


Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
30485,HOUSEHOLD_2_516_TX_2_validation,0.570029,0.54277,0.54277,0.54277,0.601231,0.729446,0.799521,0.663754,0.636495,0.630617,0.630617,0.675019,0.793799,0.793799,0.657877,0.630617,0.630617,0.630617,0.675019,0.793799,0.793799,0.641596,0.574208,0.564508,0.558679,0.616974,0.74519,0.740026
30486,HOUSEHOLD_2_516_TX_3_validation,0.524109,0.496849,0.496849,0.496849,0.55531,0.683525,0.753601,0.617833,0.590574,0.584697,0.584697,0.629098,4.135762,4.135762,3.407546,3.32441,3.32441,3.32441,3.647145,0.747878,0.747878,0.595675,0.528287,0.518588,0.512758,0.571054,0.699269,0.694106
30487,HOUSEHOLD_2_516_WI_1_validation,0.492689,0.484044,0.484044,0.484044,0.542504,0.683525,0.753601,0.586414,0.577768,0.571891,0.571891,0.616293,0.747878,0.747878,0.580537,0.571891,0.571891,0.571891,0.616293,0.747878,0.747878,0.564256,0.515482,0.505782,0.499953,0.558248,0.699269,0.694106
30488,HOUSEHOLD_2_516_WI_2_validation,0.597186,0.58854,0.58854,0.58854,0.647001,0.77869,0.848766,0.705304,0.696658,0.690781,0.690781,0.735183,0.843043,0.843043,0.699427,0.690781,0.690781,0.690781,0.735183,0.843043,0.843043,0.683146,0.619978,0.610279,0.604449,0.662744,0.794434,0.789271
30489,HOUSEHOLD_2_516_WI_3_validation,0.4581,0.449454,0.449454,0.449454,0.507915,0.639604,0.70968,0.566218,0.557572,0.551695,0.551695,0.596097,0.703957,0.703957,0.560341,0.551695,0.551695,0.551695,0.596097,0.703957,0.703957,0.54406,0.480892,0.471193,0.465363,0.523658,0.655348,0.650185


(30490, 29)

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_evaluation,1.336028,1.252811,1.252811,1.252811,1.413603,1.819387,1.819387,1.308948,1.252811,1.415849,1.415849,1.533851,1.905519,1.905519,1.45916,1.403023,1.403023,1.403023,1.521025,1.905519,1.905519,1.45916,1.403023,1.403023,1.335443,1.458081,1.863864,1.856219
1,FOODS_1_001_CA_2_evaluation,1.153464,1.070246,1.070246,1.070246,1.231038,1.636822,1.636822,1.126383,1.070246,1.233284,1.233284,1.351286,1.722955,1.722955,1.276596,1.220458,1.220458,1.220458,1.33846,1.722955,1.722955,1.276596,1.220458,1.220458,1.152879,1.275516,1.6813,1.673654
2,FOODS_1_001_CA_3_evaluation,1.80185,1.72631,1.72631,1.72631,1.841045,2.17022,2.17022,1.77477,1.72631,1.889348,1.889348,1.961293,2.256353,2.256353,1.924982,1.876522,1.876522,1.876522,1.948467,2.256353,2.256353,1.924982,1.876522,1.876522,1.808943,1.885523,2.214698,2.207052
3,FOODS_1_001_CA_4_evaluation,0.866997,0.791457,0.791457,0.791457,0.895967,1.098913,1.098913,0.839916,0.791457,0.954495,0.954495,1.016215,1.185046,1.185046,0.990129,0.941669,0.941669,0.941669,1.003389,1.185046,1.185046,0.990129,0.941669,0.941669,0.87409,0.940445,1.143391,1.135745
4,FOODS_1_001_TX_1_evaluation,0.914921,0.839382,0.839382,0.839382,0.943892,1.224599,1.224599,0.887841,0.839382,1.035303,1.035303,1.097023,1.343615,1.343615,1.070936,1.022477,1.022477,1.022477,1.084197,1.343615,1.343615,1.070936,1.022477,1.022477,0.954898,0.998069,1.269077,1.261431


Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
30485,HOUSEHOLD_2_516_TX_2_evaluation,0.58061,0.54277,0.54277,0.54277,0.601231,0.729446,0.729446,0.570029,0.54277,0.636495,0.636495,0.680896,0.793799,0.793799,0.657877,0.630617,0.630617,0.630617,0.675019,0.793799,0.793799,0.657877,0.630617,0.630617,0.614336,0.632668,0.751185,0.745355
30486,HOUSEHOLD_2_516_TX_3_evaluation,0.534689,0.496849,0.496849,0.496849,0.55531,0.683525,0.683525,0.524109,0.496849,0.590574,0.590574,0.634976,0.747878,0.747878,0.611956,0.584697,0.584697,0.584697,0.629098,0.747878,0.747878,0.611956,0.584697,0.584697,0.568416,0.586748,0.705264,0.699434
30487,HOUSEHOLD_2_516_WI_1_evaluation,0.50327,0.484044,0.484044,0.484044,0.542504,0.683525,0.683525,0.492689,0.484044,0.577768,0.577768,0.62217,0.747878,0.747878,0.580537,0.571891,0.571891,0.571891,0.616293,0.747878,0.747878,0.580537,0.571891,0.571891,0.55561,0.573942,0.705264,0.699434
30488,HOUSEHOLD_2_516_WI_2_evaluation,0.607766,0.58854,0.58854,0.58854,0.647001,0.77869,0.77869,0.597186,0.58854,0.696658,0.696658,0.74106,0.843043,0.843043,0.699427,0.690781,0.690781,0.690781,0.735183,0.843043,0.843043,0.699427,0.690781,0.690781,0.6745,0.678439,0.800429,0.794599
30489,HOUSEHOLD_2_516_WI_3_evaluation,0.46868,0.449454,0.449454,0.449454,0.507915,0.639604,0.639604,0.4581,0.449454,0.557572,0.557572,0.601974,0.703957,0.703957,0.560341,0.551695,0.551695,0.551695,0.596097,0.703957,0.703957,0.560341,0.551695,0.551695,0.535414,0.539353,0.661343,0.655514


(30490, 29)

### 誤差率の検証

In [None]:
sta = pd.read_csv(path + "sales_train_evaluation.csv")
sta = sta[["d_1914", "d_1915","d_1916","d_1917","d_1918","d_1919","d_1920","d_1921","d_1922","d_1923","d_1924","d_1925","d_1926","d_1927","d_1928","d_1929","d_1930","d_1931","d_1932","d_1933","d_1934","d_1935","d_1936","d_1937","d_1938","d_1939","d_1940","d_1941"]]
sta.columns = ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]

In [None]:
for i in ["F1", "F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","F25","F26","F27","F28"]:
    sta[i] = sta[i].astype('float64')

In [None]:
pre_val_temp = pre_val.drop("id",axis=1)
pre_val2_temp = pre_val2.drop("id",axis=1)
pre_val3_temp = pre_val3.drop("id",axis=1)

### d_1914_1941 の誤差率

In [None]:
# 二乗平均平方根誤差 (RMSE)
display(np.sqrt(mean_squared_error(sta, pre_val_temp)))
display(np.sqrt(mean_squared_error(sta, pre_val2_temp)))
display(np.sqrt(mean_squared_error(sta, pre_val3_temp)))

In [None]:
# 平均絶対誤差 (MAE)
display(mean_absolute_error(sta, pre_val_temp))
display(mean_absolute_error(sta, pre_val2_temp))
display(mean_absolute_error(sta, pre_val3_temp))

In [None]:
# 決定係数
# モデルの当てはまりの良さを示す指標で、最も当てはまりの良い場合、1.0 となります
display(r2_score(sta, pre_val_temp))
display(r2_score(sta, pre_val2_temp))
display(r2_score(sta, pre_val3_temp))

In [None]:
del pre_val_temp,pre_val2_temp ,pre_val3_temp
gc.collect()

# データ統合 csv保存

# lightGBM

In [None]:
pre_uni = pd.concat([sta, pre_eva], axis=0)

# catboost

In [None]:
pre_uni2 = pd.concat([sta, pre_eva2], axis=0)

### XGboost

In [None]:
pre_uni3 = pd.concat([sta, pre_eva3], axis=0)

In [None]:
del pre_val, pre_eva, pre_val2, pre_eva2, pre_val3, pre_eva3, sta
gc.collect()

In [None]:
pre_uni_am = pre_uni.set_index('id') * 0.2 + pre_uni2.set_index('id') * 0.7 + pre_uni2.set_index('id') * 0.1

In [None]:
pre_uni_am = pre_uni_am.reset_index()

In [None]:
del pre_uni, pre_uni2, pre_uni3
gc.collect()

In [None]:
display(pre_uni_am.head())
display(pre_uni_am.tail())
display(pre_uni_am.shape)

In [None]:
pre_uni_am.to_csv('submission_uni.csv', index = False)

In [None]:
del pre_uni_am
gc.collect()