In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rcParams['font.family'] = 'Gulim'

In [2]:
from workalendar.asia import SouthKorea
import pendulum

In [3]:
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')

## 전처리
- 일자에서 월과 일을 분리
- 요일을 레이블 인코딩화(EDA로 요일의 중요도 순 파악)
- 월 별, 일 별 중식 석식 수요 차이 파악

In [4]:
train['월'] = pd.DatetimeIndex(train['일자']).month
test['월'] = pd.DatetimeIndex(test['일자']).month
train['주'] = pd.DatetimeIndex(train['일자']).week
test['주'] = pd.DatetimeIndex(test['일자']).week
train['일'] = pd.DatetimeIndex(train['일자']).day
test['일'] = pd.DatetimeIndex(test['일자']).day

train['출근'] = train['본사정원수']-(train['본사휴가자수']+train['본사출장자수']+train['현본사소속재택근무자수'])
train['휴가비율'] = train['본사휴가자수']/train['본사정원수']
train['출장비율'] = train['본사출장자수']/train['본사정원수']
train['야근비율'] = train['본사시간외근무명령서승인건수']/train['출근']
train['재택비율'] = train['현본사소속재택근무자수']/train['본사정원수']

test['출근'] = test['본사정원수']-(test['본사휴가자수']+test['본사출장자수']+test['현본사소속재택근무자수'])
test['휴가비율'] = test['본사휴가자수']/test['본사정원수']
test['출장비율'] = test['본사출장자수']/test['본사정원수']
test['야근비율'] = test['본사시간외근무명령서승인건수']/test['출근']
test['재택비율'] = test['현본사소속재택근무자수']/test['본사정원수']

train['식사가능자수'] = train['본사정원수'] - train['본사휴가자수'] - train['현본사소속재택근무자수']
test['식사가능자수'] = test['본사정원수'] - test['본사휴가자수'] - test['현본사소속재택근무자수']

# train['중식참여율'] = train['중식계'] / train['식사가능자수']

  train['주'] = pd.DatetimeIndex(train['일자']).week
  test['주'] = pd.DatetimeIndex(test['일자']).week


In [5]:
month_rank4dinner = {
    1: 11,
    2: 2,
    3: 1,
    4: 4,
    5: 7,
    6: 6,
    7: 10,
    8: 8,
    9: 5,
    10: 3,
    11: 9,
    12: 12
}
train['월(석식)'] = train['월'].map(month_rank4dinner)
test['월(석식)'] = test['월'].map(month_rank4dinner)

month_rank4lunch = {
    1: 3,
    2: 1,
    3: 2,
    4: 6,
    5: 7,
    6: 8,
    7: 10,
    8: 9,
    9: 5,
    10: 4,
    11: 11,
    12: 12
}
train['월(중식)'] = train['월'].map(month_rank4lunch)
test['월(중식)'] = test['월'].map(month_rank4lunch)

weekday_rank4dinner = {
    '월': 1,
    '화': 2,
    '수': 4,
    '목': 3,
    '금': 5,
}

weekday_rank4lunch = {
    '월': 1,
    '화': 2,
    '수': 3,
    '목': 4,
    '금': 5,
}

train['요일(석식)'] = train['요일'].map(weekday_rank4dinner)
test['요일(석식)'] = test['요일'].map(weekday_rank4dinner)

train['요일(중식)'] = train['요일'].map(weekday_rank4lunch)
test['요일(중식)'] = test['요일'].map(weekday_rank4lunch)

In [6]:
rank = pd.DataFrame(range(1,53))
week_rank_lunch = pd.pivot_table(train,values='중식계',index='주').sort_values(by='중식계').reset_index().drop('중식계',axis=1)
week_rank_dinner = pd.pivot_table(train,values='석식계',index='주').sort_values(by='석식계').reset_index().drop('석식계',axis=1)


week_rank4lunch = {}
for i in range(len(rank)):
    week_rank4lunch[week_rank_lunch['주'][i]] = rank[0][i]


week_rank4dinner = {}
for i in range(len(rank)):
    week_rank4dinner[week_rank_dinner['주'][i]] = rank[0][i]
    
    
train['주(중식)'] = train['주'].map(week_rank4lunch)
test['주(중식)'] = test['주'].map(week_rank4lunch)

train['주(석식)'] = train['주'].map(week_rank4dinner)
test['주(석식)'] = test['주'].map(week_rank4dinner)

In [7]:
def is_holiday(date):
    holidays = list(map(str, pd.Series(np.array(SouthKorea().holidays(int(date[:4])))[:, 0])))
    
    yesterday = str(np.datetime64(date) - 1)
    tomorrow = str(np.datetime64(date) + 1)

    if tomorrow in holidays and yesterday in holidays:
        return 3
    if tomorrow in holidays:
        return 2
    elif yesterday in holidays:
        return 1
    else : 
        return 0

def week_of_month(x):
    dt = pendulum.parse(x)
    
    wom = dt.week_of_month
    if wom < 0:
        wom += 52
    return wom
    

df = pd.concat([train[['본사정원수', '일자']], test[['본사정원수', '일자']]])
df['년월'] = df['일자'].apply(lambda x : x[:7])
df = df[['년월', '본사정원수']].groupby(by=['년월'], as_index=False).mean()

def member_change(date):
    this_month = date[:7]
    last_month = str(np.datetime64(this_month) - 1)
    
    this_month_member = int(df[df['년월'] == this_month]['본사정원수'])
    last_month_member = int(df[df['년월'] == last_month]['본사정원수'])
    
    
    return  this_month_member - last_month_member

train['공휴일전후'] = train['일자'].apply(is_holiday)
test['공휴일전후'] = test['일자'].apply(is_holiday)

train['몇주차'] = train['일자'].apply(week_of_month)
test['몇주차'] = test['일자'].apply(week_of_month)

train = train[train['일자'] > '2016-03']
train['인원변화'] = train['일자'].apply(member_change)
test['인원변화'] = test['일자'].apply(member_change)

## 공휴일 변수 생성

In [8]:
train.columns

Index(['일자', '요일', '본사정원수', '본사휴가자수', '본사출장자수', '본사시간외근무명령서승인건수',
       '현본사소속재택근무자수', '조식메뉴', '중식메뉴', '석식메뉴', '중식계', '석식계', '월', '주', '일',
       '출근', '휴가비율', '출장비율', '야근비율', '재택비율', '식사가능자수', '월(석식)', '월(중식)',
       '요일(석식)', '요일(중식)', '주(중식)', '주(석식)', '공휴일전후', '몇주차', '인원변화'],
      dtype='object')

In [23]:
# 메뉴 변수 없이 사용할떄 해당 코드 사용['공휴일전후', '몇주차', '인원변화']

lunch_train = train[['공휴일전후', '몇주차', '인원변화', '요일(중식)','월(중식)','일','주(중식)','출근', '휴가비율', '출장비율', '야근비율', '재택비율','본사출장자수','본사휴가자수','식사가능자수','본사시간외근무명령서승인건수']]
lunch_test = test[['공휴일전후', '몇주차', '인원변화', '요일(중식)','월(중식)','일','주(중식)','출근', '휴가비율', '출장비율', '야근비율', '재택비율','본사출장자수','본사휴가자수','식사가능자수','본사시간외근무명령서승인건수']]

dinner_train= train[['공휴일전후', '몇주차', '인원변화', '요일(석식)','월(석식)','일','주(석식)','출근', '휴가비율', '출장비율', '야근비율', '재택비율','본사출장자수','본사휴가자수','식사가능자수','본사시간외근무명령서승인건수']]
dinner_test = test[['공휴일전후', '몇주차', '인원변화', '요일(석식)','월(석식)','일','주(석식)','출근', '휴가비율', '출장비율', '야근비율', '재택비율','본사출장자수','본사휴가자수','식사가능자수','본사시간외근무명령서승인건수']]

In [10]:
lunch_train.columns

Index(['공휴일전후', '몇주차', '인원변화', '요일(중식)', '월(중식)', '일', '주(중식)', '출근', '휴가비율',
       '출장비율', '야근비율', '재택비율', '본사출장자수', '본사휴가자수', '식사가능자수', '본사시간외근무명령서승인건수'],
      dtype='object')

In [11]:
print(lunch_train.shape)
print(lunch_test.shape)

(1187, 16)
(50, 16)


In [12]:
print(dinner_train.shape)
print(dinner_test.shape)

(1187, 16)
(50, 16)


In [13]:
cat_features = [f for f in lunch_train.columns if lunch_train[f].dtype == 'object']

def column_index(df, cat_features):
    cols = df.columns.values
    sidx = np.argsort(cols)
    return sidx[np.searchsorted(cols, cat_features, sorter=sidx)]

cat_features_idx = column_index(lunch_train, cat_features)    
print("Cat features are: %s" % [f for f in cat_features])
print(cat_features_idx)

Cat features are: []
[]


In [14]:
y_lunch = train[['중식계']]
y_dinner = train[['석식계']]

#### 분포 확인 및 분포 조정

# 중식 예측모델

In [15]:
from automl_alex import LightGBMRegressor, XGBoostRegressor
from sklearn.metrics import mean_absolute_error
import sklearn

In [18]:
lunch_model = XGBoostRegressor(random_state=42)

lunch_model.opt(lunch_train,  y_lunch,
            verbose=3,  cold_start=120,folds=8,opt_lvl=3,early_stoping=120, auto_parameters=False,#metric=sklearn.metrics.roc_auc_score,
      timeout=1100,metric=sklearn.metrics.mean_absolute_error # optimization time in seconds,
      )

[32m20:43:59[0m | [1mregression optimize: minimize[0m
--- Logging error in Loguru Handler #2 ---
Record was: {'elapsed': datetime.timedelta(seconds=17, microseconds=144111), 'exception': None, 'extra': {}, 'file': (name='_base.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\_base.py'), 'function': 'opt', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 270, 'message': 'regression optimize: minimize', 'module': '_base', 'name': 'automl_alex._base', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 20, 43, 59, 167287, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rotat

--- Logging error in Loguru Handler #6 ---
Record was: {'elapsed': datetime.timedelta(seconds=159, microseconds=260366), 'exception': None, 'extra': {}, 'file': (name='optimizer.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\optimizer.py'), 'function': 'opt', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 625, 'message': '--------------------------------------------------', 'module': 'optimizer', 'name': 'automl_alex.optimizer', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 20, 46, 21, 283542, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rotating=True)
  File "C:

--- Logging error in Loguru Handler #6 ---
Record was: {'elapsed': datetime.timedelta(seconds=159, microseconds=306889), 'exception': None, 'extra': {}, 'file': (name='optimizer.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\optimizer.py'), 'function': '_print_opt_parameters', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 256, 'message': 'CV_Folds = 8', 'module': 'optimizer', 'name': 'automl_alex.optimizer', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 20, 46, 21, 330065, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rotating=True)
  File "C:\anaconda3\envs\mach

--- Logging error in Loguru Handler #6 ---
Record was: {'elapsed': datetime.timedelta(seconds=159, microseconds=324202), 'exception': None, 'extra': {}, 'file': (name='optimizer.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\optimizer.py'), 'function': '_print_opt_parameters', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 262, 'message': 'Metric = mean_absolute_error', 'module': 'optimizer', 'name': 'automl_alex.optimizer', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 20, 46, 21, 347378, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rotating=True)
  File "C:\ana

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_xgb_booster,params_xgb_colsample_bytree,params_xgb_gamma,params_xgb_grow_policy,params_xgb_learning_rate,params_xgb_max_depth,params_xgb_min_child_weight,params_xgb_n_estimators,params_xgb_normalize_type,params_xgb_rate_drop,params_xgb_sample_type,params_xgb_skip_drop,params_xgb_subsample,state
0,0,73.9754,2021-07-15 20:43:59.946390,2021-07-15 20:44:05.941890,0 days 00:00:05.995500,gbtree,0.8,0.004043,depthwise,38,18.0,7.0,10,,,,,1.0,COMPLETE
1,1,101.8504,2021-07-15 20:44:06.044734,2021-07-15 20:44:06.409795,0 days 00:00:00.365061,gblinear,0.2,,,84,,,5,,,,,0.3,COMPLETE
2,2,108.4775,2021-07-15 20:44:06.492398,2021-07-15 20:44:07.003646,0 days 00:00:00.511248,gblinear,0.2,,,30,,,8,,,,,0.7,COMPLETE
3,3,69.6359,2021-07-15 20:44:07.079165,2021-07-15 20:45:20.880501,0 days 00:01:13.801336,dart,0.6,0.622003,depthwise,20,19.0,8.0,10,tree,0.000091,weighted,1.884118e-08,0.6,COMPLETE
4,4,77.7800,2021-07-15 20:45:20.961225,2021-07-15 20:45:27.392921,0 days 00:00:06.431696,dart,0.4,0.433505,depthwise,26,16.0,97.0,3,tree,0.000004,uniform,1.286525e-05,0.7,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,124,73.5118,2021-07-15 20:59:37.425336,2021-07-15 20:59:38.118918,0 days 00:00:00.693582,gbtree,0.9,0.000092,depthwise,23,2.0,23.0,3,,,,,1.0,COMPLETE
125,125,70.1207,2021-07-15 20:59:38.216996,2021-07-15 20:59:53.313024,0 days 00:00:15.096028,dart,0.9,0.000057,depthwise,56,19.0,22.0,4,forest,0.000047,uniform,1.313715e-07,0.9,COMPLETE
126,126,70.8338,2021-07-15 20:59:53.410725,2021-07-15 21:00:08.725592,0 days 00:00:15.314867,dart,0.9,0.000063,depthwise,60,19.0,22.0,4,forest,0.000043,uniform,1.144838e-07,0.9,COMPLETE
127,127,70.8338,2021-07-15 21:00:08.816969,2021-07-15 21:00:23.472054,0 days 00:00:14.655085,dart,0.9,0.000057,depthwise,60,19.0,22.0,4,forest,0.000041,uniform,6.593197e-08,0.9,COMPLETE


In [19]:
predicts_LGBM_lunch = lunch_model.predict(lunch_test)

In [20]:
dinner_model = XGBoostRegressor(random_state=42)

dinner_model.opt(dinner_train,  y_dinner,
            verbose=3,  cold_start=120,folds=8,opt_lvl=3,early_stoping=120, auto_parameters=False,#metric=sklearn.metrics.roc_auc_score,
      timeout=1100,metric=sklearn.metrics.mean_absolute_error # optimization time in seconds,
      )

[32m21:00:40[0m | [1mregression optimize: minimize[0m
--- Logging error in Loguru Handler #6 ---
Record was: {'elapsed': datetime.timedelta(seconds=1018, microseconds=229856), 'exception': None, 'extra': {}, 'file': (name='_base.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\_base.py'), 'function': 'opt', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 270, 'message': 'regression optimize: minimize', 'module': '_base', 'name': 'automl_alex._base', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 21, 0, 40, 253032, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rota

--- Logging error in Loguru Handler #10 ---
Record was: {'elapsed': datetime.timedelta(seconds=1170, microseconds=65920), 'exception': None, 'extra': {}, 'file': (name='optimizer.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\optimizer.py'), 'function': 'opt', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 625, 'message': '--------------------------------------------------', 'module': 'optimizer', 'name': 'automl_alex.optimizer', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 21, 3, 12, 89096, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rotating=True)
  File "C:\

--- Logging error in Loguru Handler #10 ---
Record was: {'elapsed': datetime.timedelta(seconds=1170, microseconds=111481), 'exception': None, 'extra': {}, 'file': (name='optimizer.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\optimizer.py'), 'function': '_print_opt_parameters', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 256, 'message': 'CV_Folds = 8', 'module': 'optimizer', 'name': 'automl_alex.optimizer', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 21, 3, 12, 134657, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rotating=True)
  File "C:\anaconda3\envs\mac

--- Logging error in Loguru Handler #10 ---
Record was: {'elapsed': datetime.timedelta(seconds=1170, microseconds=135728), 'exception': None, 'extra': {}, 'file': (name='optimizer.py', path='C:\\anaconda3\\envs\\machinelearning\\lib\\site-packages\\automl_alex\\optimizer.py'), 'function': '_print_opt_parameters', 'level': (name='INFO', no=20, icon='ℹ️'), 'line': 262, 'message': 'Metric = mean_absolute_error', 'module': 'optimizer', 'name': 'automl_alex.optimizer', 'process': (id=11572, name='MainProcess'), 'thread': (id=2380, name='MainThread'), 'time': datetime(2021, 7, 15, 21, 3, 12, 158904, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400), '대한민국 표준시'))}
Traceback (most recent call last):
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_handler.py", line 177, in emit
    self._sink.write(str_record)
  File "C:\anaconda3\envs\machinelearning\lib\site-packages\loguru\_file_sink.py", line 174, in write
    self._terminate_file(is_rotating=True)
  File "C:\an

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_xgb_booster,params_xgb_colsample_bytree,params_xgb_gamma,params_xgb_grow_policy,params_xgb_learning_rate,params_xgb_max_depth,params_xgb_min_child_weight,params_xgb_n_estimators,params_xgb_normalize_type,params_xgb_rate_drop,params_xgb_sample_type,params_xgb_skip_drop,params_xgb_subsample,state
0,0,57.0494,2021-07-15 21:00:40.641594,2021-07-15 21:00:47.427678,0 days 00:00:06.786084,gbtree,0.8,0.004043,depthwise,38,18.0,7.0,10,,,,,1.0,COMPLETE
1,1,76.4076,2021-07-15 21:00:47.513898,2021-07-15 21:00:47.922779,0 days 00:00:00.408881,gblinear,0.2,,,84,,,5,,,,,0.3,COMPLETE
2,2,78.4022,2021-07-15 21:00:48.000437,2021-07-15 21:00:48.567640,0 days 00:00:00.567203,gblinear,0.2,,,30,,,8,,,,,0.7,COMPLETE
3,3,57.8736,2021-07-15 21:00:48.667949,2021-07-15 21:02:12.745433,0 days 00:01:24.077484,dart,0.6,0.622003,depthwise,20,19.0,8.0,10,tree,9.149878e-05,weighted,1.884118e-08,0.6,COMPLETE
4,4,65.9970,2021-07-15 21:02:12.831178,2021-07-15 21:02:20.022726,0 days 00:00:07.191548,dart,0.4,0.433505,depthwise,26,16.0,97.0,3,tree,4.005370e-06,uniform,1.286525e-05,0.7,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,114,61.4737,2021-07-15 21:13:26.660722,2021-07-15 21:13:27.560219,0 days 00:00:00.899497,gbtree,0.3,0.000010,lossguide,86,12.0,37.0,3,,,,,0.5,COMPLETE
115,115,61.9643,2021-07-15 21:13:27.662838,2021-07-15 21:13:31.542420,0 days 00:00:03.879582,dart,0.8,0.000010,depthwise,66,2.0,25.0,2,forest,4.510428e-03,uniform,2.169378e-07,0.4,COMPLETE
116,116,61.9526,2021-07-15 21:13:31.647024,2021-07-15 21:15:00.403426,0 days 00:01:28.756402,dart,0.7,0.000017,lossguide,4,18.0,59.0,10,tree,9.481549e-08,weighted,4.145646e-05,0.8,COMPLETE
117,117,82.9994,2021-07-15 21:15:00.549961,2021-07-15 21:15:49.706399,0 days 00:00:49.156438,dart,0.1,0.006835,depthwise,32,20.0,2.0,8,tree,1.736956e-08,uniform,2.906963e-06,0.6,PRUNED


In [24]:
predicts_LGBM_dinner = dinner_model.predict(dinner_test)

In [25]:
submission = pd.read_csv('../data/sample_submission.csv')

In [26]:
submission.iloc[:,1] = predicts_LGBM_lunch
submission.iloc[:,2] = predicts_LGBM_dinner
submission.head()

Unnamed: 0,일자,중식계,석식계
0,2021-01-27,1006.574158,300.069824
1,2021-01-28,957.091064,434.266998
2,2021-01-29,635.769653,237.423019
3,2021-02-01,1271.646484,531.715698
4,2021-02-02,1095.182495,497.948212


In [27]:
answer = pd.read_csv('../submission/20210630_lgbm_autoML.csv')

lunch_answer = np.array(answer.iloc[:,1])
dinner_answer = np.array(answer.iloc[:,2])

abs(predicts_LGBM_lunch - lunch_answer).mean(), abs(predicts_LGBM_dinner - dinner_answer).mean()

(17.552833603009592, 20.14220606134737)

# 저장

In [28]:
import datetime
today = str(datetime.datetime.now().date()).replace("-","")
print("오늘 날짜 : " + today)

submission.to_csv(f'../submission/{today}_xgb_autoML.csv', index =False)

오늘 날짜 : 20210715
