In [1]:
# for "2. Data Loading"
import pandas as pd

# for "3-1. Feature Generation"
import numpy as np

# for "3-2. Feature Engineering"
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import RobustScaler, StandardScaler

# for "4. Modeling with Pycaret"
from pycaret.regression import *

# for "5. Modeling with CatBoostRegressor"
from catboost import CatBoostRegressor
import optuna
from optuna import Trial
from optuna.samplers import TPESampler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split, StratifiedKFold

### 1121
추가 사항 석식계 예측에 자기계발 열을 제거하여 학습 진행

### 1122
추가 사항 독립변수 스케일링 진행

### 1124
- 타겟 변수와 상관관계가 낮은 "재택근무자수"열 제거
- 앙상블 개수 변화

### 1125
- 석식계 예측에 중식계 사용

In [2]:
# 데이터 로드
pre_tr = pd.read_csv('data_preprocess/pre_tr_1118.csv',encoding = "cp949")
pre_te = pd.read_csv('data_preprocess/pre_te_1118.csv',encoding = "cp949")
pre_tr = pre_tr.set_index("일자")
pre_te = pre_te.set_index("일자")
today = "1125"

In [3]:
# 데이터 확인
pre_tr.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1205 entries, 2016-02-01 to 2021-01-26
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   요일      1205 non-null   int64  
 1   정원수     1205 non-null   int64  
 2   휴가자수    1205 non-null   int64  
 3   출장자수    1205 non-null   int64  
 4   야근자수    1205 non-null   int64  
 5   재택근무자수  1205 non-null   float64
 6   조식메뉴    1205 non-null   object 
 7   중식메뉴    1205 non-null   object 
 8   석식메뉴    1205 non-null   object 
 9   중식계     1205 non-null   float64
 10  석식계     1205 non-null   float64
 11  출근자수    1205 non-null   float64
 12  월       1205 non-null   int64  
 13  년도      1205 non-null   int64  
dtypes: float64(4), int64(7), object(3)
memory usage: 141.2+ KB


조식메뉴, 중식메뉴, 석식메뉴 다 없어도 될거 같은데,,,?
일자도 일단 제외하고 진행

In [4]:
#중식 타겟 데이터
y_lun = pre_tr["중식계"]
# 석식 타겟 데이터
y_din = pre_tr["석식계"]

In [5]:
pre_tr.corr()["중식계"]

요일       -0.731563
정원수      -0.115529
휴가자수     -0.391975
출장자수     -0.512680
야근자수      0.535611
재택근무자수    0.076509
중식계       1.000000
석식계       0.508287
출근자수      0.286810
월        -0.154664
년도       -0.078804
Name: 중식계, dtype: float64

In [6]:
pre_tr.corr()["석식계"]

요일       -0.312112
정원수      -0.173852
휴가자수     -0.316894
출장자수     -0.188164
야근자수      0.571168
재택근무자수   -0.057534
중식계       0.508287
석식계       1.000000
출근자수      0.172373
월        -0.127142
년도       -0.194792
Name: 석식계, dtype: float64

## 중식계 예측

In [7]:
train_dr = pre_tr.loc[:,"조식메뉴":"석식계"]
X_train = pre_tr.drop(train_dr,axis=1)
X_train = X_train.drop("재택근무자수",axis = 1)

In [8]:
# minmax scale 활용
from sklearn.preprocessing import StandardScaler
minmax = StandardScaler()
col_na = X_train.loc[:,  "요일":"년도"].columns
col_na
X_train.loc[:, "요일":"년도"] = minmax.fit_transform(
    X_train[col_na])

In [9]:
X_train["중식계"] = pre_tr["중식계"]
display(X_train)

Unnamed: 0_level_0,요일,정원수,휴가자수,출장자수,야근자수,출근자수,월,년도,중식계
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-02-01,-1.415977,-1.208083,-0.748721,-2.094550,-0.146735,0.202544,-1.306899,-1.431796,1039.0
2016-02-02,-0.709162,-1.208083,-0.748721,-1.565987,0.182349,0.072225,-1.306899,-1.431796,867.0
2016-02-03,-0.002346,-1.208083,-0.707092,-1.405120,-0.662707,-0.001434,-1.306899,-1.431796,1017.0
2016-02-04,0.704469,-1.208083,-0.374061,-0.485881,0.328609,-0.500047,-1.306899,-1.431796,978.0
2016-02-05,1.411285,-1.208083,0.833176,-1.382140,-0.975540,-1.264963,-1.306899,-1.431796,925.0
...,...,...,...,...,...,...,...,...,...
2021-01-20,-0.002346,1.023312,-0.575267,-0.991463,-1.097423,-0.262072,-1.596547,2.056506,1093.0
2021-01-21,0.704469,1.023312,-0.457319,-0.233090,0.763325,-0.318733,-1.596547,2.056506,832.0
2021-01-22,1.411285,1.023312,0.673599,0.157587,-1.109612,-1.066652,-1.596547,2.056506,579.0
2021-01-25,-1.415977,1.023312,-0.353247,-2.025607,1.388992,0.174213,-1.596547,2.056506,1145.0


In [10]:
reg = setup(X_train, 
            preprocess = False, # True로 설정되면, 자체적인 Feature Engineering을 추가로 진행해 Predict가 불가능해진다.
            train_size = 0.999,  # 우리는 전체 데이터를 학습해 test를 예측하는게 목표이기 때문에, 0.999로 설정한다.
            target = '중식계', # 목표 변수는 중식계 이다.
            silent = True, # 엔터를 누르기 귀찮다. 궁금하면 풀어보세요
            use_gpu = False, # GPU가 있으면 사용하세요 (Cat BOost 속도 향상)
            numeric_features=list(X_train.drop(columns = ['중식계']).columns), # 모든 변수가 숫자로써의 의미가 있다.
            session_id = 2021,
            fold_shuffle = True
            )

Unnamed: 0,Description,Value
0,session_id,2021
1,Target,중식계
2,Original Data,"(1205, 9)"
3,Missing Values,False
4,Numeric Features,8
5,Categorical Features,0
6,Transformed Train Set,"(1203, 8)"
7,Transformed Test Set,"(2, 8)"
8,Shuffle Train-Test,True
9,Stratify Train-Test,False


In [11]:
top5 = compare_models(n_select = 5, sort = 'MAE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
catboost,CatBoost Regressor,65.1518,7663.71,87.1848,0.8224,0.1072,0.0787,0.472
gbr,Gradient Boosting Regressor,66.7394,8123.0124,89.5863,0.8115,0.1094,0.0806,0.028
et,Extra Trees Regressor,70.1876,9193.0612,95.3028,0.7862,0.1182,0.0856,0.065
rf,Random Forest Regressor,70.6107,9036.151,94.5084,0.79,0.1165,0.0857,0.089
lightgbm,Light Gradient Boosting Machine,70.7275,8825.3579,93.3408,0.7943,0.1156,0.0859,0.124
xgboost,Extreme Gradient Boosting,72.8265,9353.952,96.3416,0.7824,0.1181,0.0876,0.312
knn,K Neighbors Regressor,75.7644,10081.3208,100.0612,0.7661,0.1231,0.0919,0.006
ada,AdaBoost Regressor,84.351,11532.7917,107.243,0.7322,0.1324,0.1042,0.023
huber,Huber Regressor,86.2217,12848.1062,112.8932,0.7037,0.1367,0.1035,0.006
lar,Least Angle Regression,86.7158,12719.2241,112.3469,0.7065,0.1354,0.1036,0.005


In [12]:
models = []
for m in top5:
    models.append(tune_model(m, 
                             optimize = 'MAE', 
                             choose_better = True,
                            n_iter = 30))

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,75.1819,11191.2263,105.7886,0.7067,0.1349,0.0939
1,61.6339,6462.5389,80.3899,0.8689,0.1041,0.0776
2,67.5481,7682.7972,87.6516,0.8407,0.1217,0.0878
3,62.3034,6992.4155,83.6207,0.8398,0.0977,0.0726
4,75.2421,9441.4618,97.1672,0.7746,0.1233,0.0936
5,75.3019,10775.2858,103.8041,0.7731,0.134,0.0918
6,65.2906,6975.7743,83.5211,0.8182,0.1115,0.0827
7,65.4087,7089.3683,84.1984,0.8292,0.0955,0.0737
8,63.8466,7789.1428,88.2561,0.8017,0.1039,0.0751
9,76.2358,10466.3069,102.305,0.778,0.1308,0.095


In [13]:
test_dr = pre_te.loc[:,"조식메뉴":"석식메뉴"]
X_test = pre_te.drop(test_dr,axis=1)
X_test = X_test.drop("재택근무자수",axis = 1)
X_test.loc[:, "요일":"년도"] = minmax.transform(
    X_test[col_na])
display(X_test)

Unnamed: 0_level_0,요일,정원수,휴가자수,출장자수,야근자수,출근자수,월,년도
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-27,-0.002346,1.023312,-0.485071,-1.359159,-1.093361,0.202544,-1.596547,2.056506
2021-01-28,0.704469,1.023312,-0.374061,-0.669729,0.547999,0.072225,-1.596547,2.056506
2021-01-29,1.411285,1.023312,0.777671,0.180568,-1.113675,-0.001434,-1.596547,2.056506
2021-02-01,-1.415977,0.678673,-0.346308,-2.002626,1.072096,-0.500047,-1.306899,2.056506
2021-02-02,-0.709162,0.678673,-0.665463,-1.267235,0.734886,-1.264963,-1.306899,2.056506
2021-02-03,-0.002346,0.678673,-0.686278,-0.968482,-1.093361,-1.644589,-1.306899,2.056506
2021-02-04,0.704469,0.678673,-0.672401,-0.69271,0.820204,-1.752244,-1.306899,2.056506
2021-02-05,1.411285,0.678673,0.076918,0.249511,-1.113675,-0.313067,-1.306899,2.056506
2021-02-08,-1.415977,0.678673,-0.485071,-1.543006,1.689637,-0.40939,-1.306899,2.056506
2021-02-09,-0.709162,0.678673,-0.443443,-1.336178,1.088347,-0.522711,-1.306899,2.056506


In [14]:
voting = blend_models(models, optimize = 'MAE')
voting = tune_model(voting, 
                 optimize = 'MAE', 
                 choose_better = True,
                 n_iter = 30)

voting = finalize_model(voting)
sample = pd.read_csv('sub/sample_submission.csv', encoding = "cp949")
layer1_pred = voting.predict(X_test)
sample['중식계'] = layer1_pred

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,73.5936,10500.0868,102.4699,0.7248,0.1209,0.0886
1,59.9581,6157.6482,78.4707,0.8751,0.1035,0.0767
2,63.3062,6986.8914,83.5876,0.8552,0.1163,0.0825
3,64.3884,7962.9797,89.2355,0.8176,0.1048,0.0756
4,71.7956,9364.0877,96.7682,0.7764,0.1198,0.0878
5,67.9113,8527.9751,92.347,0.8204,0.111,0.0793
6,58.4013,5399.744,73.483,0.8593,0.0924,0.0721
7,61.5867,6492.4665,80.5758,0.8436,0.0927,0.0703
8,59.3399,6396.2432,79.9765,0.8372,0.0947,0.0707
9,68.3375,9015.8722,94.9519,0.8088,0.1204,0.0847


## 석식계 예측

In [15]:
train_dr = pre_tr.loc[:,"조식메뉴":"석식계"]
X_train = pre_tr.drop(train_dr,axis=1)
X_train["중식계"] = pre_tr["중식계"]
X_train = X_train.drop("재택근무자수",axis=1)
X_train.loc[:, "요일":"년도"] = minmax.fit_transform(
    X_train[col_na])
X_train["석식계"] = pre_tr["석식계"]
display(X_train)

Unnamed: 0_level_0,요일,정원수,휴가자수,출장자수,야근자수,출근자수,월,년도,중식계,석식계
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-02-01,-1.415977,-1.208083,-0.748721,-2.094550,-0.146735,0.202544,-1.306899,-1.431796,1039.0,331.0
2016-02-02,-0.709162,-1.208083,-0.748721,-1.565987,0.182349,0.072225,-1.306899,-1.431796,867.0,560.0
2016-02-03,-0.002346,-1.208083,-0.707092,-1.405120,-0.662707,-0.001434,-1.306899,-1.431796,1017.0,573.0
2016-02-04,0.704469,-1.208083,-0.374061,-0.485881,0.328609,-0.500047,-1.306899,-1.431796,978.0,525.0
2016-02-05,1.411285,-1.208083,0.833176,-1.382140,-0.975540,-1.264963,-1.306899,-1.431796,925.0,330.0
...,...,...,...,...,...,...,...,...,...,...
2021-01-20,-0.002346,1.023312,-0.575267,-0.991463,-1.097423,-0.262072,-1.596547,2.056506,1093.0,421.0
2021-01-21,0.704469,1.023312,-0.457319,-0.233090,0.763325,-0.318733,-1.596547,2.056506,832.0,353.0
2021-01-22,1.411285,1.023312,0.673599,0.157587,-1.109612,-1.066652,-1.596547,2.056506,579.0,217.0
2021-01-25,-1.415977,1.023312,-0.353247,-2.025607,1.388992,0.174213,-1.596547,2.056506,1145.0,502.0


In [16]:
# 자기계발 날 제거 
# (석식계 0인 이상치가 발견되는 열들 제거 하여 학습 데이터 생성) 
# 총 43개 열 제거
day_of_dinner0 = X_train.loc[X_train["석식계"] == 0]
display(day_of_dinner0)

Unnamed: 0_level_0,요일,정원수,휴가자수,출장자수,야근자수,출근자수,월,년도,중식계,석식계
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-11-30,-0.002346,-0.694044,-0.623834,-0.784634,-1.113675,0.276202,1.299928,-1.431796,1109.0,0.0
2016-12-28,-0.002346,-0.600583,0.056104,-0.370976,-1.113675,-0.290403,1.589576,-1.431796,767.0,0.0
2017-01-25,-0.002346,-0.647314,-0.547515,-0.876558,-1.113675,0.281868,-1.596547,-0.734135,720.0,0.0
2017-02-22,-0.002346,-1.027001,-0.575267,0.249511,-1.113675,-0.341397,-1.306899,-0.734135,1065.0,0.0
2017-03-22,-0.002346,-1.056208,-0.727906,-0.141166,-1.113675,-0.148751,-1.017252,-0.734135,953.0,0.0
2017-04-26,-0.002346,-1.062049,-0.783412,1.444522,-1.113675,-0.500047,-0.727604,-0.734135,835.0,0.0
2017-05-31,-0.002346,-0.997795,-0.797288,0.548263,-1.113675,-0.205412,-0.437957,-0.734135,910.0,0.0
2017-06-28,-0.002346,-0.93354,-0.693216,0.410377,-1.113675,-0.19408,-0.148309,-0.734135,745.0,0.0
2017-07-26,-0.002346,0.182158,0.666661,0.111625,-1.113675,-0.148751,0.141338,-0.734135,797.0,0.0
2017-09-01,1.411285,-0.968588,0.132424,1.421541,-0.93085,-1.151642,0.720633,-0.734135,663.0,0.0


In [17]:
# 자기계발 날 제거 
# (석식계 0인 이상치가 발견되는 열들 제거 하여 학습 데이터 생성) 
# 총 43개 열 제거
X_train = X_train.drop(day_of_dinner0.index,axis=0)
display(X_train)

Unnamed: 0_level_0,요일,정원수,휴가자수,출장자수,야근자수,출근자수,월,년도,중식계,석식계
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-02-01,-1.415977,-1.208083,-0.748721,-2.094550,-0.146735,0.202544,-1.306899,-1.431796,1039.0,331.0
2016-02-02,-0.709162,-1.208083,-0.748721,-1.565987,0.182349,0.072225,-1.306899,-1.431796,867.0,560.0
2016-02-03,-0.002346,-1.208083,-0.707092,-1.405120,-0.662707,-0.001434,-1.306899,-1.431796,1017.0,573.0
2016-02-04,0.704469,-1.208083,-0.374061,-0.485881,0.328609,-0.500047,-1.306899,-1.431796,978.0,525.0
2016-02-05,1.411285,-1.208083,0.833176,-1.382140,-0.975540,-1.264963,-1.306899,-1.431796,925.0,330.0
...,...,...,...,...,...,...,...,...,...,...
2021-01-20,-0.002346,1.023312,-0.575267,-0.991463,-1.097423,-0.262072,-1.596547,2.056506,1093.0,421.0
2021-01-21,0.704469,1.023312,-0.457319,-0.233090,0.763325,-0.318733,-1.596547,2.056506,832.0,353.0
2021-01-22,1.411285,1.023312,0.673599,0.157587,-1.109612,-1.066652,-1.596547,2.056506,579.0,217.0
2021-01-25,-1.415977,1.023312,-0.353247,-2.025607,1.388992,0.174213,-1.596547,2.056506,1145.0,502.0


In [18]:
reg = setup(X_train, 
            preprocess = False, # True로 설정되면, 자체적인 Feature Engineering을 추가로 진행해 Predict가 불가능해진다.
            train_size = 0.999,  # 우리는 전체 데이터를 학습해 test를 예측하는게 목표이기 때문에, 0.999로 설정한다.
            target = '석식계', # 목표 변수는 석식계 이다.
            silent = True, # 엔터를 누르기 귀찮다. 궁금하면 풀어보세요
            use_gpu = False, # GPU가 있으면 사용하세요 (Cat BOost 속도 향상)
            numeric_features=list(X_train.drop(columns = ['석식계']).columns), # 모든 변수가 숫자로써의 의미가 있다.
            session_id = 2021,
            fold_shuffle = True
            )

Unnamed: 0,Description,Value
0,session_id,2021
1,Target,석식계
2,Original Data,"(1162, 10)"
3,Missing Values,False
4,Numeric Features,9
5,Categorical Features,0
6,Transformed Train Set,"(1160, 9)"
7,Transformed Test Set,"(2, 9)"
8,Shuffle Train-Test,True
9,Stratify Train-Test,False


In [19]:
top5 = compare_models(n_select = 5, sort = 'MAE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
catboost,CatBoost Regressor,41.8762,3164.7131,56.0295,0.7274,0.1363,0.0992,0.586
lightgbm,Light Gradient Boosting Machine,43.4531,3372.7332,57.9168,0.7098,0.1393,0.1022,0.059
gbr,Gradient Boosting Regressor,43.9469,3394.2302,57.9534,0.7092,0.1397,0.1038,0.033
et,Extra Trees Regressor,44.8432,3704.618,60.5039,0.6824,0.146,0.1067,0.072
rf,Random Forest Regressor,45.1221,3737.4041,60.8586,0.6801,0.1464,0.1068,0.093
xgboost,Extreme Gradient Boosting,45.2105,3748.1735,60.8784,0.6789,0.1479,0.1063,0.219
br,Bayesian Ridge,49.4308,4256.5423,64.9318,0.6351,0.1554,0.117,0.005
ridge,Ridge Regression,49.432,4257.6744,64.938,0.635,0.1553,0.1169,0.005
lr,Linear Regression,49.4334,4258.0097,64.9401,0.635,0.1553,0.1169,0.005
lar,Least Angle Regression,49.4334,4258.0099,64.9401,0.635,0.1553,0.1169,0.005


In [20]:
models = []
for m in top5:
    models.append(tune_model(m, 
                             optimize = 'MAE', 
                             choose_better = True,
                            n_iter = 30))

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,44.2379,3645.478,60.3778,0.6984,0.1674,0.1154
1,50.7155,4863.4819,69.7387,0.539,0.1379,0.1035
2,49.2981,3990.1163,63.1674,0.6346,0.1679,0.1257
3,45.2682,3427.5402,58.5452,0.7015,0.1393,0.1069
4,48.646,5016.8151,70.8295,0.6386,0.1854,0.1273
5,41.4172,3010.7199,54.87,0.7485,0.1247,0.0946
6,43.2477,3178.6695,56.3797,0.7294,0.1359,0.1025
7,42.1892,3164.1663,56.2509,0.7582,0.1448,0.104
8,41.2801,2731.164,52.2605,0.6723,0.1128,0.0916
9,49.1033,4639.9633,68.1173,0.6531,0.1742,0.1224


In [53]:
test_dr = pre_te.loc[:,"조식메뉴":"석식메뉴"]
X_test = pre_te.drop(test_dr,axis=1)
X_test = X_test.reset_index()
X_test.중식계 = sample.중식계
X_test = X_test.set_index("일자")
X_test = X_test.drop("재택근무자수",axis = 1)
X_test.loc[:,  "요일":"년도"] = minmax.transform(
    X_test[col_na])
display(X_test)
today = "1125"

Unnamed: 0_level_0,요일,정원수,휴가자수,출장자수,야근자수,출근자수,월,년도,중식계
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-01-27,-0.002346,1.023312,-0.485071,-1.359159,-1.093361,0.202544,-1.596547,2.056506,1016.516032
2021-01-28,0.704469,1.023312,-0.374061,-0.669729,0.547999,0.072225,-1.596547,2.056506,917.042004
2021-01-29,1.411285,1.023312,0.777671,0.180568,-1.113675,-0.001434,-1.596547,2.056506,611.244667
2021-02-01,-1.415977,0.678673,-0.346308,-2.002626,1.072096,-0.500047,-1.306899,2.056506,1203.020576
2021-02-02,-0.709162,0.678673,-0.665463,-1.267235,0.734886,-1.264963,-1.306899,2.056506,968.102977
2021-02-03,-0.002346,0.678673,-0.686278,-0.968482,-1.093361,-1.644589,-1.306899,2.056506,946.674423
2021-02-04,0.704469,0.678673,-0.672401,-0.69271,0.820204,-1.752244,-1.306899,2.056506,894.213281
2021-02-05,1.411285,0.678673,0.076918,0.249511,-1.113675,-0.313067,-1.306899,2.056506,657.203636
2021-02-08,-1.415977,0.678673,-0.485071,-1.543006,1.689637,-0.40939,-1.306899,2.056506,1220.794044
2021-02-09,-0.709162,0.678673,-0.443443,-1.336178,1.088347,-0.522711,-1.306899,2.056506,1022.139012


In [54]:
voting = blend_models(models, optimize = 'MAE')
voting = tune_model(voting, 
                 optimize = 'MAE', 
                 choose_better = True,
                 n_iter = 30)

voting = finalize_model(voting)
layer1_pred = voting.predict(X_test)
sample['석식계'] = layer1_pred

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,42.0217,3087.2151,55.5627,0.7445,0.1559,0.1081
1,46.2888,3974.044,63.04,0.6233,0.1272,0.095
2,47.9275,3833.2465,61.9132,0.649,0.1499,0.1159
3,41.5977,2818.3401,53.088,0.7546,0.1211,0.0937
4,42.767,3864.4051,62.1643,0.7216,0.1659,0.1105
5,37.9507,2659.6824,51.5721,0.7778,0.1194,0.0865
6,38.5873,2611.4848,51.1027,0.7776,0.1166,0.088
7,41.1211,2964.2184,54.4446,0.7735,0.1439,0.101
8,39.2583,2395.4783,48.9436,0.7126,0.1039,0.0859
9,42.8658,3635.7776,60.2974,0.7282,0.1551,0.1066


In [55]:
sample

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,1016.516032,388.45672
1,2021-01-28,917.042004,394.979188
2,2021-01-29,611.244667,297.533082
3,2021-02-01,1203.020576,531.964796
4,2021-02-02,968.102977,436.062651
5,2021-02-03,946.674423,423.441048
6,2021-02-04,894.213281,440.865735
7,2021-02-05,657.203636,363.579902
8,2021-02-08,1220.794044,599.656496
9,2021-02-09,1022.139012,530.29324


In [56]:
sample.to_csv('sub/Model_Cat_{}.csv'.format(today), index=False)