In [69]:
import xgboost as xgb
import tqdm as tq
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import pandas as pd
from tqdm import tqdm
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np

### Train Data Preprocessing

In [59]:
train = pd.read_csv('train.csv')

In [69]:
train.head()

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
0,1_20220601 00,1,20220601 00,18.6,,0.9,42.0,,,1085.28
1,1_20220601 01,1,20220601 01,18.0,,1.1,45.0,,,1047.36
2,1_20220601 02,1,20220601 02,17.7,,1.5,45.0,,,974.88
3,1_20220601 03,1,20220601 03,16.7,,1.4,48.0,,,953.76
4,1_20220601 04,1,20220601 04,18.4,,2.8,43.0,,,986.4


In [60]:
train.pop('일조(hr)')
train.pop('일사(MJ/m2)')

train['일시'] = pd.to_datetime(train['일시'], format='%Y%m%d %H')
train['month'] = train['일시'].dt.month
train['day'] = train['일시'].dt.weekday
train['hour'] = train['일시'].dt.hour
train['holiday'] = train.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
train['sin_time'] = np.sin(2*np.pi*train.hour/24) # cyclical encoding
train['cos_time'] = np.cos(2*np.pi*train.hour/24)
train['DI'] = 1.8*train['기온(C)'] - 0.55*(1-train['습도(%)'])*(1.8*train['기온(C)']-26) + 32 # 불쾌지수

train.pop('num_date_time')
train.pop('일시')
train.pop('hour')
train['power'] = train['전력소비량(kWh)']
train.pop('전력소비량(kWh)')
train['강수량(mm)'] = train['강수량(mm)'].fillna(0)
train['풍속(m/s)'] = train['풍속(m/s)'].fillna(0)
train['습도(%)'] = train['습도(%)'].fillna(0)

In [71]:
train.to_csv('train_preprocessed.csv', index=False)

In [72]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI,power
0,1,18.6,0.0,0.9,42.0,6,2,0,0.0,1.0,234.154,1085.28
1,1,18.0,0.0,1.1,45.0,6,2,0,0.258819,0.965926,219.28,1047.36
2,1,17.7,0.0,1.5,45.0,6,2,0,0.5,0.866025,205.672,974.88
3,1,16.7,0.0,1.4,48.0,6,2,0,0.707107,0.707107,167.011,953.76
4,1,18.4,0.0,2.8,43.0,6,2,0,0.866025,0.5,229.592,986.4


### Test Data Preprocessing

In [61]:
test = pd.read_csv('test.csv')

In [62]:
test['일시'] = pd.to_datetime(test['일시'], format='%Y%m%d %H')
test['month'] = test['일시'].dt.month
test['day'] = test['일시'].dt.weekday
test['hour'] = test['일시'].dt.hour
test['holiday'] = test.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
test['sin_time'] = np.sin(2*np.pi*test.hour/24)
test['cos_time'] = np.cos(2*np.pi*test.hour/24)
test['DI'] = 1.8*test['기온(C)'] - 0.55*(1-test['습도(%)'])*(1.8*test['기온(C)']-26) + 32

test.pop('num_date_time')
test.pop('일시')
test.pop('hour')

0         0
1         1
2         2
3         3
4         4
         ..
16795    19
16796    20
16797    21
16798    22
16799    23
Name: hour, Length: 16800, dtype: int64

In [75]:
test.to_csv('test_preprocessed.csv', index=False)

In [14]:
test.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI
0,1,23.5,0.0,2.2,72,8,3,0,0.0,1.0,710.815
1,1,23.0,0.0,0.9,72,8,3,0,0.258819,0.965926,674.77
2,1,22.7,0.0,1.5,75,8,3,0,0.5,0.866025,677.662
3,1,22.1,0.0,1.3,78,8,3,0,0.707107,0.707107,655.363
4,1,21.8,0.0,1.0,77,8,3,0,0.866025,0.5,624.672


### Training

In [63]:
import os
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV, train_test_split

In [64]:
def SMAPE(true, pred):
    return np.mean((np.abs(true - pred))/(np.abs(true) + np.abs(pred))) * 100    

In [20]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,week,holiday,sin_time,cos_time,DI,power
0,1,18.6,0.0,0.9,42.0,6,2,22,0,0.0,1.0,234.154,1085.28
1,1,18.0,0.0,1.1,45.0,6,2,22,0,0.258819,0.965926,219.28,1047.36
2,1,17.7,0.0,1.5,45.0,6,2,22,0,0.5,0.866025,205.672,974.88
3,1,16.7,0.0,1.4,48.0,6,2,22,0,0.707107,0.707107,167.011,953.76
4,1,18.4,0.0,2.8,43.0,6,2,22,0,0.866025,0.5,229.592,986.4


In [65]:
def tr_ts_split(train, test, number, val_hour):
    x = train.loc[train['건물번호']==number].drop(['건물번호','power'], axis=1)
    y = train.loc[train['건물번호']==number, 'power']

    #if number == 54:
    x = pd.concat([x[:601], x[700:]], axis=0).reset_index(drop=True)
    y = pd.concat([y[:601], y[700:]]).reset_index(drop=True)

    if val_hour == 0:
        return x, None, y, None
    else:
        x_train, x_valid, y_train, y_valid = x[:-val_hour], x[-val_hour:], y[:-val_hour], y[-val_hour:]
        return x_train, x_valid.reset_index(drop=True), y_train, y_valid.reset_index(drop=True)

In [66]:
def fit_and_pred_XGB(train, test, number, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [74]:
def fit_and_pred_RF(train, test, number, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train.dropna(), test, number, 7*24)
    
    rf_reg = RandomForestRegressor(random_state=seed)
    rf_reg.fit(x_train, y_train)
    pred = rf_reg.predict(x_valid)
    pred = pd.Series(pred)

    x_test = test[test['건물번호']==number]
    y_test_pred = rf_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred                                    

In [75]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_RF(train, test, i+1, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)

  0%|          | 0/100 [00:00<?, ?it/s]

===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:00<00:47,  2.10it/s]

SMAPE: 3.81607975204115
R2: 0.9382542184288511
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:00<00:46,  2.09it/s]

SMAPE: 4.867200648713607
R2: 0.7763007096473291
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:01<00:46,  2.09it/s]

SMAPE: 7.7069141261757705
R2: 0.7044058382387558
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:01<00:45,  2.09it/s]

SMAPE: 2.6295317324970036
R2: 0.9541398097540355
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:02<00:44,  2.14it/s]

SMAPE: 2.6806479501464584
R2: 0.9727180063840729
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:02<00:43,  2.14it/s]

SMAPE: 2.16355388770827
R2: 0.9704339792850838
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:03<00:43,  2.13it/s]

SMAPE: 4.631251200590728
R2: 0.8938503046146825
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:03<00:43,  2.13it/s]

SMAPE: 2.6015812045585722
R2: 0.7028133728965408
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:04<00:42,  2.13it/s]

SMAPE: 2.1283778181013053
R2: 0.9665456168458111
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:04<00:42,  2.13it/s]

SMAPE: 4.690170995407238
R2: 0.06500874864653094
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:05<00:41,  2.14it/s]

SMAPE: 1.9116744901547011
R2: 0.8832901180685022
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:05<00:41,  2.15it/s]

SMAPE: 2.4815619328076512
R2: 0.8664673302670154
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:06<00:40,  2.14it/s]

SMAPE: 3.2920364666488298
R2: 0.28128614925023554
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:06<00:41,  2.10it/s]

SMAPE: 8.470760739573072
R2: 0.22610613234099208
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:07<00:40,  2.12it/s]

SMAPE: 1.4439752293057564
R2: 0.9140344005337818
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:07<00:39,  2.14it/s]

SMAPE: 2.548924377334314
R2: 0.9744329763755644
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:08<00:38,  2.13it/s]

SMAPE: 2.7838239506088778
R2: 0.9454634355214464
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:08<00:38,  2.12it/s]

SMAPE: 3.7694009492810294
R2: 0.9181599146592628
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:08<00:38,  2.12it/s]

SMAPE: 4.265690928934783
R2: 0.9277149053981694
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:09<00:37,  2.13it/s]

SMAPE: 2.4292519802640933
R2: 0.9747302167892041
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:09<00:37,  2.13it/s]

SMAPE: 3.806566532597179
R2: 0.9315238956209586
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:10<00:36,  2.15it/s]

SMAPE: 1.8305577295731381
R2: 0.9381958643202355
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:10<00:35,  2.15it/s]

SMAPE: 0.9269066816452773
R2: 0.9669231469014048
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:11<00:35,  2.16it/s]

SMAPE: 1.3153918643755969
R2: 0.9829084967645222
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:11<00:34,  2.16it/s]

SMAPE: 1.2790723456912096
R2: 0.9832323720232548
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:12<00:33,  2.18it/s]

SMAPE: 1.2183526106766307
R2: 0.9814403485805724
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:12<00:33,  2.19it/s]

SMAPE: 0.980770415430509
R2: 0.9690667456063947
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [00:13<00:32,  2.20it/s]

SMAPE: 3.019988143000832
R2: 0.893443550502151
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [00:13<00:32,  2.20it/s]

SMAPE: 2.3423030277280814
R2: 0.9234394754036023
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [00:13<00:31,  2.19it/s]

SMAPE: 3.1247811497535194
R2: 0.8909177684595639
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [00:14<00:31,  2.20it/s]

SMAPE: 3.1006594741064135
R2: 0.9005565126109285
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [00:14<00:31,  2.18it/s]

SMAPE: 0.20204890416909288
R2: 0.8552501903782831
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [00:15<00:30,  2.17it/s]

SMAPE: 0.2684752281375804
R2: 0.4577168446596839
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [00:15<00:30,  2.17it/s]

SMAPE: 0.3858832314158362
R2: 0.8508291507197276
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [00:16<00:30,  2.17it/s]

SMAPE: 0.21359921098445012
R2: 0.8877497765916647
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [00:16<00:29,  2.17it/s]

SMAPE: 0.3343134947781449
R2: 0.5961038758043062
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [00:17<00:29,  2.13it/s]

SMAPE: 2.7891843870317863
R2: 0.9717759420917264
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [00:17<00:29,  2.11it/s]

SMAPE: 2.2341777474739506
R2: 0.9854370280219107
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [00:18<00:28,  2.12it/s]

SMAPE: 2.6730025807419264
R2: 0.9703409295932119
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [00:18<00:28,  2.13it/s]

SMAPE: 5.75368382606543
R2: 0.887933518636843
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [00:19<00:27,  2.12it/s]

SMAPE: 2.2563804647610746
R2: 0.9905204343201396
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [00:19<00:27,  2.13it/s]

SMAPE: 5.764268600047815
R2: 0.7906010839588729
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [00:20<00:26,  2.13it/s]

SMAPE: 3.01051394988604
R2: 0.9936727508985866
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [00:20<00:26,  2.13it/s]

SMAPE: 2.310154668327256
R2: 0.9937892305609368
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [00:21<00:25,  2.14it/s]

SMAPE: 1.5009182843603501
R2: 0.9400363536980346
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [00:21<00:25,  2.15it/s]

SMAPE: 2.268407254510354
R2: 0.9432954224273304
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [00:21<00:24,  2.15it/s]

SMAPE: 2.226009553336245
R2: 0.9333594493941354
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [00:22<00:23,  2.18it/s]

SMAPE: 1.1214533211000972
R2: 0.9833635549126017
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [00:22<00:23,  2.20it/s]

SMAPE: 1.59441018145801
R2: 0.9229431390943257
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [00:23<00:22,  2.21it/s]

SMAPE: 1.3602567913085193
R2: 0.9204864108512055
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [00:23<00:22,  2.18it/s]

SMAPE: 1.9180676119485909
R2: 0.9311832356778328
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [00:24<00:22,  2.17it/s]

SMAPE: 2.0935757482095503
R2: 0.9465409017335444
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [00:24<00:21,  2.16it/s]

SMAPE: 5.58696122514135
R2: 0.9592604199331006
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [00:25<00:21,  2.16it/s]

SMAPE: 9.222910057851367
R2: 0.7931655129605127
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [00:25<00:20,  2.17it/s]

SMAPE: 0.7760442907770205
R2: 0.9234005961821116
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [00:26<00:20,  2.16it/s]

SMAPE: 0.4135204293848765
R2: 0.9244514395741013
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [00:26<00:19,  2.16it/s]

SMAPE: 2.6554535201281757
R2: 0.9809909252915219
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [00:26<00:19,  2.15it/s]

SMAPE: 0.23670892141224703
R2: 0.920833939381469
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [00:27<00:19,  2.13it/s]

SMAPE: 2.7308138201117464
R2: 0.9799158507617827
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [00:27<00:18,  2.12it/s]

SMAPE: 3.332501927803592
R2: 0.9350027868731436
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [00:28<00:18,  2.14it/s]

SMAPE: 2.4290931986130837
R2: 0.9270876663943561
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [00:28<00:17,  2.15it/s]

SMAPE: 2.882217165357329
R2: 0.9125645284829034
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [00:29<00:17,  2.17it/s]

SMAPE: 3.4149037565694553
R2: 0.8949602368003133
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [00:29<00:16,  2.19it/s]

SMAPE: 2.192605547310273
R2: 0.932467352366322
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [00:30<00:15,  2.21it/s]

SMAPE: 4.564329757053826
R2: 0.8739320769441087
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [00:30<00:15,  2.22it/s]

SMAPE: 2.4244998024443847
R2: 0.8265624115134869
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [00:31<00:14,  2.23it/s]

SMAPE: 3.332442030185826
R2: 0.8322504730913509
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [00:31<00:14,  2.23it/s]

SMAPE: 2.4661242270411563
R2: 0.9259756065643394
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [00:32<00:14,  2.21it/s]

SMAPE: 1.2378588567640767
R2: 0.9731007487803348
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [00:32<00:13,  2.18it/s]

SMAPE: 2.9242912012858864
R2: 0.9353082335484849
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [00:32<00:13,  2.18it/s]

SMAPE: 2.4507192180919257
R2: 0.8340617821952501
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [00:33<00:12,  2.20it/s]

SMAPE: 2.7733701831279545
R2: 0.7555292420116023
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [00:33<00:12,  2.19it/s]

SMAPE: 1.8094169744102617
R2: 0.7870499450423885
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [00:34<00:12,  2.17it/s]

SMAPE: 3.227206744145175
R2: 0.915656762699637
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [00:34<00:11,  2.18it/s]

SMAPE: 1.795155687007019
R2: 0.9244839068695585
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [00:35<00:10,  2.19it/s]

SMAPE: 2.6644569317697706
R2: 0.7380537078138328
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [00:35<00:10,  2.18it/s]

SMAPE: 1.2645450120717565
R2: 0.9929984811307015
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [00:36<00:10,  2.17it/s]

SMAPE: 1.775172305388308
R2: 0.9924433940189776
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [00:36<00:09,  2.16it/s]

SMAPE: 1.2026472327547444
R2: 0.987467573474717
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [00:37<00:09,  2.16it/s]

SMAPE: 2.1932373322995904
R2: 0.9904666887899196
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [00:37<00:08,  2.15it/s]

SMAPE: 1.8181520011197698
R2: 0.9914942169679944
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [00:38<00:08,  2.16it/s]

SMAPE: 2.1799013680169064
R2: 0.9803570411343799
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [00:38<00:07,  2.16it/s]

SMAPE: 1.4762290220884111
R2: 0.9938377574748046
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [00:38<00:07,  2.18it/s]

SMAPE: 1.5863766283209517
R2: 0.989003395116982
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [00:39<00:06,  2.17it/s]

SMAPE: 2.110631507340148
R2: 0.9304924946109449
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [00:39<00:06,  2.14it/s]

SMAPE: 4.6735207190526875
R2: 0.8042585156894867
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [00:40<00:06,  2.14it/s]

SMAPE: 5.475569058822784
R2: 0.8311518019327729
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [00:40<00:05,  2.13it/s]

SMAPE: 3.485550047859166
R2: 0.8699239231961176
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [00:41<00:05,  2.13it/s]

SMAPE: 4.481320288664517
R2: 0.8187637258583744
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [00:41<00:04,  2.12it/s]

SMAPE: 4.090439826961783
R2: 0.9185593630504253
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [00:42<00:04,  2.10it/s]

SMAPE: 6.321587710752557
R2: 0.9335721185128683
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [00:42<00:03,  2.08it/s]

SMAPE: 3.6227618514306217
R2: 0.8661026418947262
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [00:43<00:03,  2.10it/s]

SMAPE: 3.1402163139316612
R2: 0.8803611282961165
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [00:43<00:02,  2.12it/s]

SMAPE: 3.138765680342527
R2: 0.903475505279252
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [00:44<00:02,  2.11it/s]

SMAPE: 7.843140324549508
R2: 0.6047171618588155
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [00:44<00:01,  2.13it/s]

SMAPE: 1.8025855377558582
R2: 0.9547015010652354
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [00:45<00:01,  2.14it/s]

SMAPE: 3.153051745809262
R2: 0.8982265079464286
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [00:45<00:00,  2.14it/s]

SMAPE: 7.076964088552592
R2: 0.8518380869876305
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [00:45<00:00,  2.17it/s]

SMAPE: 1.6628715344759977
R2: 0.978905208400493
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [00:46<00:00,  2.15it/s]

SMAPE: 2.9758569305188023
R2: 0.958168573833171





In [76]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.81607975204115, 'R2': 0.9382542184288511},
 {'건물번호': 2, 'SMAPE': 4.867200648713607, 'R2': 0.7763007096473291},
 {'건물번호': 3, 'SMAPE': 7.7069141261757705, 'R2': 0.7044058382387558},
 {'건물번호': 4, 'SMAPE': 2.6295317324970036, 'R2': 0.9541398097540355},
 {'건물번호': 5, 'SMAPE': 2.6806479501464584, 'R2': 0.9727180063840729},
 {'건물번호': 6, 'SMAPE': 2.16355388770827, 'R2': 0.9704339792850838},
 {'건물번호': 7, 'SMAPE': 4.631251200590728, 'R2': 0.8938503046146825},
 {'건물번호': 8, 'SMAPE': 2.6015812045585722, 'R2': 0.7028133728965408},
 {'건물번호': 9, 'SMAPE': 2.1283778181013053, 'R2': 0.9665456168458111},
 {'건물번호': 10, 'SMAPE': 4.690170995407238, 'R2': 0.06500874864653094},
 {'건물번호': 11, 'SMAPE': 1.9116744901547011, 'R2': 0.8832901180685022},
 {'건물번호': 12, 'SMAPE': 2.4815619328076512, 'R2': 0.8664673302670154},
 {'건물번호': 13, 'SMAPE': 3.2920364666488298, 'R2': 0.28128614925023554},
 {'건물번호': 14, 'SMAPE': 8.470760739573072, 'R2': 0.22610613234099208},
 {'건물번호': 15, 'SMAPE': 1.443975229

In [77]:
smape_val

2.8263124491815237

In [78]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = pred_test_whole
sub.to_csv('rf_sub_23.csv', index=False)

### Improvement

In [67]:
train['적정온도차이'] = train['기온(C)'] - 26

In [68]:
test['적정온도차이'] = test['기온(C)'] - 26

In [34]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI,power,적정온도차이
0,1,18.6,0.0,0.9,42.0,6,2,0,0.0,1.0,234.154,1085.28,-7.4
1,1,18.0,0.0,1.1,45.0,6,2,0,0.258819,0.965926,219.28,1047.36,-8.0
2,1,17.7,0.0,1.5,45.0,6,2,0,0.5,0.866025,205.672,974.88,-8.3
3,1,16.7,0.0,1.4,48.0,6,2,0,0.707107,0.707107,167.011,953.76,-9.3
4,1,18.4,0.0,2.8,43.0,6,2,0,0.866025,0.5,229.592,986.4,-7.6


In [37]:
def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

In [38]:
def fit_and_pred_XGB_with_weight(train, test, number, weight, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.set_params(**{'objective':weighted_mse(weight)})
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [44]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_XGB_with_weight(train, test, i+1, 100, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)



===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:01<02:57,  1.80s/it]

SMAPE: 3.883634407146668
R2: 0.9428148434678705
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:03<02:26,  1.50s/it]

SMAPE: 5.256542311528698
R2: 0.8268383595313882
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:04<02:11,  1.35s/it]

SMAPE: 5.545908115619256
R2: 0.8225081381620768
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:05<02:12,  1.39s/it]

SMAPE: 2.0216084532929974
R2: 0.9738995748382593
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:07<02:42,  1.71s/it]

SMAPE: 2.8028523787496087
R2: 0.9733667899879699
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:09<02:47,  1.78s/it]

SMAPE: 1.861585193427125
R2: 0.981714605738076
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:11<02:33,  1.65s/it]

SMAPE: 5.0474765252492375
R2: 0.8962074296471658
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:12<02:30,  1.63s/it]

SMAPE: 2.5489555005401963
R2: 0.7355127209176974
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:14<02:33,  1.69s/it]

SMAPE: 2.0478630548996173
R2: 0.975397976943655
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:16<02:21,  1.57s/it]

SMAPE: 3.907172078032968
R2: 0.32611646656584903
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:17<02:23,  1.62s/it]

SMAPE: 1.676392266013526
R2: 0.9014899611347567
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:20<02:56,  2.00s/it]

SMAPE: 1.8723574629050932
R2: 0.9008362680544917
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:22<02:48,  1.93s/it]

SMAPE: 3.0274657714415754
R2: 0.3797024654736437
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:23<02:28,  1.73s/it]

SMAPE: 8.411099584140395
R2: 0.2536910586619444
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:25<02:24,  1.70s/it]

SMAPE: 1.2548517916499227
R2: 0.9371161321264849
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:26<02:16,  1.63s/it]

SMAPE: 2.2092638330705237
R2: 0.9848943403666937
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:28<02:23,  1.72s/it]

SMAPE: 2.9155781243960273
R2: 0.9459348281219652
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:30<02:28,  1.81s/it]

SMAPE: 3.3829864057136083
R2: 0.9332030692073239
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:33<02:43,  2.02s/it]

SMAPE: 4.304075013735136
R2: 0.9364720115975896
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:35<02:37,  1.97s/it]

SMAPE: 2.378545266421429
R2: 0.9764064401504943
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:36<02:31,  1.92s/it]

SMAPE: 3.9669116215508895
R2: 0.933790666753363
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:38<02:26,  1.87s/it]

SMAPE: 1.8200203037843907
R2: 0.9429240295041776
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:40<02:35,  2.02s/it]

SMAPE: 0.9373158040898235
R2: 0.966113940778409
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:43<02:40,  2.11s/it]

SMAPE: 1.288296737357208
R2: 0.9864269965196982
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:45<02:44,  2.20s/it]

SMAPE: 1.0654398164794496
R2: 0.9887994766418751
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:47<02:31,  2.05s/it]

SMAPE: 0.9936471600354451
R2: 0.9862221989284544
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:49<02:22,  1.95s/it]

SMAPE: 0.8588802301263323
R2: 0.9817016980675318
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [00:50<02:06,  1.76s/it]

SMAPE: 2.695373169555824
R2: 0.9583253430170969
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [00:51<01:59,  1.68s/it]

SMAPE: 1.976031642209499
R2: 0.9524808637651322
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [00:53<01:55,  1.65s/it]

SMAPE: 3.5546984752156083
R2: 0.8875174353264503
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [00:54<01:48,  1.57s/it]

SMAPE: 2.56804945577574
R2: 0.9430689817703377
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [00:57<01:59,  1.75s/it]

SMAPE: 0.19702161248468097
R2: 0.8575105699889993
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [00:59<02:16,  2.03s/it]

SMAPE: 0.22625441905510688
R2: 0.664683287931862
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [01:02<02:31,  2.29s/it]

SMAPE: 0.3245615439520965
R2: 0.8743641573149044
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [01:05<02:31,  2.33s/it]

SMAPE: 0.1725662905282774
R2: 0.9231181475717967
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [01:07<02:37,  2.46s/it]

SMAPE: 0.3293354963327888
R2: 0.6086908187486058
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [01:09<02:23,  2.29s/it]

SMAPE: 2.156013386887052
R2: 0.9834896887299619
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [01:11<02:10,  2.11s/it]

SMAPE: 1.79901744109431
R2: 0.997437075975758
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [01:13<02:00,  1.97s/it]

SMAPE: 2.6661003736987903
R2: 0.9789087896318968
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [01:14<01:47,  1.80s/it]

SMAPE: 4.45893690485189
R2: 0.9573252573751764
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [01:16<01:55,  1.96s/it]

SMAPE: 2.5510663147591073
R2: 0.9930563582771769
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [01:18<01:41,  1.74s/it]

SMAPE: 8.887439371539775
R2: 0.8210955033826771
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [01:19<01:40,  1.76s/it]

SMAPE: 3.2731321691803545
R2: 0.9935898557260004
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [01:21<01:34,  1.69s/it]

SMAPE: 2.0138184178735856
R2: 0.9963370546141956
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [01:23<01:37,  1.77s/it]

SMAPE: 1.2292015666710925
R2: 0.9618127671900834
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [01:25<01:34,  1.75s/it]

SMAPE: 2.262918598327897
R2: 0.9497221063116454
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [01:26<01:30,  1.70s/it]

SMAPE: 2.163123832268895
R2: 0.9368352656382489
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [01:28<01:29,  1.73s/it]

SMAPE: 1.0124164128796722
R2: 0.9867960252855252
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [01:30<01:37,  1.91s/it]

SMAPE: 1.231602471054904
R2: 0.9518363003022309
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [01:32<01:32,  1.84s/it]

SMAPE: 1.149476273028294
R2: 0.9402324357123669
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [01:34<01:31,  1.86s/it]

SMAPE: 1.7940870821424981
R2: 0.9374935367784654
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [01:36<01:33,  1.95s/it]

SMAPE: 1.7134989361480766
R2: 0.969279720743553
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [01:37<01:23,  1.78s/it]

SMAPE: 5.7870331403832855
R2: 0.9500465530970634
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [01:39<01:19,  1.73s/it]

SMAPE: 9.343993649103458
R2: 0.8290382764818288
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [01:41<01:18,  1.74s/it]

SMAPE: 0.8002626858072206
R2: 0.9195686449786508
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [01:43<01:23,  1.91s/it]

SMAPE: 0.372070315733699
R2: 0.9577856450595469
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [01:45<01:18,  1.83s/it]

SMAPE: 2.582743473340681
R2: 0.9855450716287837
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [01:47<01:20,  1.91s/it]

SMAPE: 0.2109516149812387
R2: 0.9415446081549514
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [01:48<01:15,  1.83s/it]

SMAPE: 2.727613907355983
R2: 0.9804815904020056
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [01:50<01:09,  1.74s/it]

SMAPE: 3.128806069117498
R2: 0.9440451782825701
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [01:52<01:05,  1.68s/it]

SMAPE: 2.4695452930152224
R2: 0.9320376576270133
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [01:53<01:02,  1.63s/it]

SMAPE: 2.800901630300211
R2: 0.9255194239196292
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [01:55<00:58,  1.59s/it]

SMAPE: 3.237423712443478
R2: 0.9093444882521864
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [01:56<00:56,  1.57s/it]

SMAPE: 1.983562546595305
R2: 0.9449119561229407
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [01:57<00:53,  1.52s/it]

SMAPE: 4.087620852650325
R2: 0.8779487186996391
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [01:59<00:52,  1.55s/it]

SMAPE: 2.1400472638820167
R2: 0.8596778515618013
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [02:00<00:49,  1.50s/it]

SMAPE: 2.7280610479038323
R2: 0.8819381309576115
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [02:02<00:47,  1.49s/it]

SMAPE: 2.4629394121129358
R2: 0.9323077167946682
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [02:04<00:47,  1.54s/it]

SMAPE: 1.1647349083922025
R2: 0.9778160470628237
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [02:06<00:49,  1.66s/it]

SMAPE: 2.7534337821495734
R2: 0.9469181421177187
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [02:07<00:48,  1.67s/it]

SMAPE: 2.19866582767245
R2: 0.8921923890792537
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [02:10<00:51,  1.86s/it]

SMAPE: 2.3547259883621723
R2: 0.788860197681444
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [02:11<00:48,  1.80s/it]

SMAPE: 1.6954986887964207
R2: 0.8131771834720108
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [02:13<00:49,  1.89s/it]

SMAPE: 3.181952948728036
R2: 0.9189238392978
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [02:15<00:44,  1.79s/it]

SMAPE: 1.7656772240774978
R2: 0.9381620469863149
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [02:16<00:41,  1.72s/it]

SMAPE: 2.4709027490552686
R2: 0.7829026370287808
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [02:18<00:39,  1.72s/it]

SMAPE: 1.2620726817927796
R2: 0.9920637841574659
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [02:20<00:40,  1.82s/it]

SMAPE: 1.5501687975698433
R2: 0.993533370338121
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [02:22<00:41,  1.96s/it]

SMAPE: 1.0436883340074417
R2: 0.9911248490291023
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [02:24<00:39,  1.98s/it]

SMAPE: 1.6852099716763582
R2: 0.9945649161319271
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [02:26<00:36,  1.94s/it]

SMAPE: 1.8448466598461641
R2: 0.9912631340286862
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [02:28<00:34,  1.90s/it]

SMAPE: 2.1228550962234487
R2: 0.9920777946017227
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [02:30<00:31,  1.86s/it]

SMAPE: 1.4764124214173946
R2: 0.9935302051646853
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [02:32<00:29,  1.83s/it]

SMAPE: 1.4184075224078103
R2: 0.9911678050073048
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [02:35<00:36,  2.41s/it]

SMAPE: 1.7766891378931844
R2: 0.9554452075929176
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [02:38<00:32,  2.34s/it]

SMAPE: 3.8742220810478485
R2: 0.8744989436601363
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [02:40<00:28,  2.22s/it]

SMAPE: 4.984363164766773
R2: 0.8799899192668625
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [02:41<00:25,  2.09s/it]

SMAPE: 2.891185437185699
R2: 0.9259233720667035
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [02:43<00:21,  1.93s/it]

SMAPE: 2.586491535349424
R2: 0.9507338522500512
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [02:45<00:18,  1.87s/it]

SMAPE: 2.9521107797127164
R2: 0.9701321600674983
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [02:46<00:16,  1.86s/it]

SMAPE: 6.481577375826742
R2: 0.9529123507180252
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [02:48<00:14,  1.82s/it]

SMAPE: 2.862682784417281
R2: 0.9385937785090032
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [02:50<00:12,  1.79s/it]

SMAPE: 3.373833828234354
R2: 0.8689785795761118
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [02:52<00:10,  1.79s/it]

SMAPE: 2.9929373345061463
R2: 0.9160657067346847
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [02:53<00:08,  1.62s/it]

SMAPE: 7.258248738455955
R2: 0.6312822310967584
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [02:55<00:06,  1.61s/it]

SMAPE: 1.491430100787991
R2: 0.9651313111504027
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [02:57<00:05,  1.82s/it]

SMAPE: 2.4080386121744386
R2: 0.9334937482771224
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [02:58<00:03,  1.70s/it]

SMAPE: 7.038908245544515
R2: 0.8492584312608136
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [03:00<00:01,  1.69s/it]

SMAPE: 1.6848495597962838
R2: 0.9786802231329808
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [03:02<00:00,  1.82s/it]

SMAPE: 2.642659282172129
R2: 0.9674120094300811





In [45]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.883634407146668, 'R2': 0.9428148434678705},
 {'건물번호': 2, 'SMAPE': 5.256542311528698, 'R2': 0.8268383595313882},
 {'건물번호': 3, 'SMAPE': 5.545908115619256, 'R2': 0.8225081381620768},
 {'건물번호': 4, 'SMAPE': 2.0216084532929974, 'R2': 0.9738995748382593},
 {'건물번호': 5, 'SMAPE': 2.8028523787496087, 'R2': 0.9733667899879699},
 {'건물번호': 6, 'SMAPE': 1.861585193427125, 'R2': 0.981714605738076},
 {'건물번호': 7, 'SMAPE': 5.0474765252492375, 'R2': 0.8962074296471658},
 {'건물번호': 8, 'SMAPE': 2.5489555005401963, 'R2': 0.7355127209176974},
 {'건물번호': 9, 'SMAPE': 2.0478630548996173, 'R2': 0.975397976943655},
 {'건물번호': 10, 'SMAPE': 3.907172078032968, 'R2': 0.32611646656584903},
 {'건물번호': 11, 'SMAPE': 1.676392266013526, 'R2': 0.9014899611347567},
 {'건물번호': 12, 'SMAPE': 1.8723574629050932, 'R2': 0.9008362680544917},
 {'건물번호': 13, 'SMAPE': 3.0274657714415754, 'R2': 0.3797024654736437},
 {'건물번호': 14, 'SMAPE': 8.411099584140395, 'R2': 0.2536910586619444},
 {'건물번호': 15, 'SMAPE': 1.254851791649

In [46]:
smape_val

2.638455250836857

In [47]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = pred_test_whole
sub.to_csv('xgb_sub_21.csv', index=False)

### Parameter Tuning - Grid Search CV

In [48]:
from sklearn.model_selection import GridSearchCV, PredefinedSplit

In [None]:
df = pd.DataFrame(columns = ['n_estimators', 'max_depth', 'min_samples_leaf', 'min_samples_split'])
preds = np.array([])
grid = {
    'n_estimators': [30, 50, 70, 100, 150],
    'max_depth': [6, 8, 10, 12],
    'min_samples_leaf': [8, 12, 18],
    'min_samples_split': [8, 16, 20]
}

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    x_train, x_valid, y_train, y_valid = tr_ts_split(train.dropna(), test, i+1, 7*24)
    gcv = GridSearchCV(estimator=RandomForestRegressor(random_state=0),
                       param_grid=grid,
                       scoring=make_scorer(SMAPE, greater_is_better=False),
                       cv=5,
                       refit=True,
                       verbose=True)
    gcv.fit(x_train, y_train)
    best = gcv.best_estimator_
    params = gcv.best_params_
    print(params)
    pred = best.predict(x_valid)
    print(f'SMAPE: {SMAPE(y_valid, pred)}')
    preds = np.append(preds, pred)
    df = pd.concat([df, pd.DataFrame(params, index=[0])], axis=0)

  0%|          | 0/100 [00:00<?, ?it/s]

===== < BUILDING NO.1 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  1%|          | 1/100 [02:30<4:08:37, 150.69s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 4.2450750272584745
===== < BUILDING NO.2 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  2%|▏         | 2/100 [05:03<4:08:03, 151.87s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 4.606879783491006
===== < BUILDING NO.3 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  3%|▎         | 3/100 [07:34<4:05:19, 151.74s/it]

{'max_depth': 6, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 6.901413792231853
===== < BUILDING NO.4 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  4%|▍         | 4/100 [10:06<4:02:44, 151.71s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.3104972169087916
===== < BUILDING NO.5 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  5%|▌         | 5/100 [12:33<3:57:29, 150.00s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 3.5431336086062513
===== < BUILDING NO.6 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  6%|▌         | 6/100 [15:03<3:54:42, 149.82s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.1525433639483067
===== < BUILDING NO.7 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  7%|▋         | 7/100 [17:34<3:52:55, 150.27s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 4.584170104036619
===== < BUILDING NO.8 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  8%|▊         | 8/100 [20:04<3:50:16, 150.18s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.7756611912898315
===== < BUILDING NO.9 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


  9%|▉         | 9/100 [22:34<3:47:40, 150.11s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.153085818675606
===== < BUILDING NO.10 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 10%|█         | 10/100 [25:03<3:44:50, 149.89s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 4.737131652042914
===== < BUILDING NO.11 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 11%|█         | 11/100 [27:29<3:40:36, 148.72s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.8379435373737165
===== < BUILDING NO.12 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 12%|█▏        | 12/100 [29:58<3:38:09, 148.74s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 2.545882237654428
===== < BUILDING NO.13 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 13%|█▎        | 13/100 [32:24<3:34:41, 148.07s/it]

{'max_depth': 6, 'min_samples_leaf': 18, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 2.468547481887228
===== < BUILDING NO.14 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 14%|█▍        | 14/100 [35:00<3:35:34, 150.41s/it]

{'max_depth': 6, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 150}
SMAPE: 8.017304879677667
===== < BUILDING NO.15 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 15%|█▌        | 15/100 [37:31<3:32:59, 150.34s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.5403942468722513
===== < BUILDING NO.16 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 16%|█▌        | 16/100 [39:59<3:29:47, 149.85s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 2.9457975499779487
===== < BUILDING NO.17 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 17%|█▋        | 17/100 [42:30<3:27:46, 150.20s/it]

{'max_depth': 12, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 3.0844313149016407
===== < BUILDING NO.18 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 18%|█▊        | 18/100 [45:01<3:25:34, 150.42s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 3.7054978754893817
===== < BUILDING NO.19 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 19%|█▉        | 19/100 [47:33<3:23:39, 150.85s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 4.584096907639814
===== < BUILDING NO.20 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 20%|██        | 20/100 [50:02<3:20:31, 150.40s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 2.4948251097543577
===== < BUILDING NO.21 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 21%|██        | 21/100 [52:32<3:17:45, 150.19s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 3.7770231167626913
===== < BUILDING NO.22 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 22%|██▏       | 22/100 [54:57<3:13:12, 148.63s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 1.8519215911386815
===== < BUILDING NO.23 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 23%|██▎       | 23/100 [57:26<3:10:43, 148.62s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.0228960728580836
===== < BUILDING NO.24 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 24%|██▍       | 24/100 [59:54<3:08:17, 148.66s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 1.5245797018558418
===== < BUILDING NO.25 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 25%|██▌       | 25/100 [1:02:24<3:06:07, 148.90s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 1.5371383746649314
===== < BUILDING NO.26 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 26%|██▌       | 26/100 [1:04:51<3:02:50, 148.25s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.2733564218872049
===== < BUILDING NO.27 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 27%|██▋       | 27/100 [1:07:18<2:59:53, 147.86s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 1.1312716144222752
===== < BUILDING NO.28 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 28%|██▊       | 28/100 [1:09:44<2:56:59, 147.50s/it]

{'max_depth': 12, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.9704688716009056
===== < BUILDING NO.29 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 29%|██▉       | 29/100 [1:12:11<2:54:08, 147.16s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 2.3970199283914186
===== < BUILDING NO.30 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 30%|███       | 30/100 [1:14:38<2:51:45, 147.22s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 150}
SMAPE: 3.0909902457545346
===== < BUILDING NO.31 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 31%|███       | 31/100 [1:17:05<2:49:21, 147.27s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.9627464298636106
===== < BUILDING NO.32 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 32%|███▏      | 32/100 [1:19:35<2:47:38, 147.92s/it]

{'max_depth': 12, 'min_samples_leaf': 18, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 0.20532342906465303
===== < BUILDING NO.33 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 33%|███▎      | 33/100 [1:22:03<2:45:20, 148.07s/it]

{'max_depth': 12, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 0.3388465443985801
===== < BUILDING NO.34 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 34%|███▍      | 34/100 [1:24:30<2:42:24, 147.64s/it]

{'max_depth': 10, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 0.36810875070587307
===== < BUILDING NO.35 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 35%|███▌      | 35/100 [1:26:56<2:39:26, 147.18s/it]

{'max_depth': 10, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 0.19411472432418916
===== < BUILDING NO.36 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 36%|███▌      | 36/100 [1:29:23<2:37:05, 147.27s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 150}
SMAPE: 0.33078483325625113
===== < BUILDING NO.37 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 37%|███▋      | 37/100 [1:31:57<2:36:30, 149.05s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 2.5325161323873324
===== < BUILDING NO.38 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 38%|███▊      | 38/100 [1:34:30<2:35:17, 150.28s/it]

{'max_depth': 10, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 2.3022906165666663
===== < BUILDING NO.39 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 39%|███▉      | 39/100 [1:37:01<2:33:05, 150.59s/it]

{'max_depth': 10, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 2.4612325926852425
===== < BUILDING NO.40 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 40%|████      | 40/100 [1:39:29<2:29:47, 149.80s/it]

{'max_depth': 10, 'min_samples_leaf': 12, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 5.789401204125523
===== < BUILDING NO.41 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 41%|████      | 41/100 [1:42:01<2:28:04, 150.59s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 70}
SMAPE: 2.3700680299076198
===== < BUILDING NO.42 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 42%|████▏     | 42/100 [1:44:32<2:25:26, 150.45s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 150}
SMAPE: 5.850944937098804
===== < BUILDING NO.43 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 43%|████▎     | 43/100 [1:47:01<2:22:38, 150.15s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 3.1565088238707752
===== < BUILDING NO.44 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 44%|████▍     | 44/100 [1:49:30<2:19:53, 149.88s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 2.350829009069913
===== < BUILDING NO.45 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 45%|████▌     | 45/100 [1:51:59<2:17:07, 149.59s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 1.680819579060943
===== < BUILDING NO.46 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 46%|████▌     | 46/100 [1:54:27<2:14:13, 149.14s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.4162409238163574
===== < BUILDING NO.47 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 47%|████▋     | 47/100 [1:56:56<2:11:40, 149.06s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.222375832826479
===== < BUILDING NO.48 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 48%|████▊     | 48/100 [1:59:19<2:07:37, 147.25s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.252752796783368
===== < BUILDING NO.49 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 49%|████▉     | 49/100 [2:01:42<2:04:08, 146.04s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.7991857408869918
===== < BUILDING NO.50 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 50%|█████     | 50/100 [2:04:08<2:01:36, 145.93s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.3538847987770846
===== < BUILDING NO.51 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 51%|█████     | 51/100 [2:06:38<2:00:06, 147.07s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.1487065045158653
===== < BUILDING NO.52 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 52%|█████▏    | 52/100 [2:09:06<1:57:58, 147.46s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.0408157640229225
===== < BUILDING NO.53 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 53%|█████▎    | 53/100 [2:11:37<1:56:12, 148.36s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 5.619958627328167
===== < BUILDING NO.54 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 54%|█████▍    | 54/100 [2:14:06<1:53:56, 148.62s/it]

{'max_depth': 6, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 9.512157776450604
===== < BUILDING NO.55 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 55%|█████▌    | 55/100 [2:16:30<1:50:27, 147.28s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 0.7903171536781615
===== < BUILDING NO.56 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 56%|█████▌    | 56/100 [2:19:00<1:48:41, 148.21s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 0.4034406317630422
===== < BUILDING NO.57 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 57%|█████▋    | 57/100 [2:21:29<1:46:18, 148.35s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 2.3408428318273358
===== < BUILDING NO.58 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 58%|█████▊    | 58/100 [2:23:57<1:43:50, 148.33s/it]

{'max_depth': 8, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 0.2861360028584648
===== < BUILDING NO.59 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 59%|█████▉    | 59/100 [2:26:30<1:42:17, 149.70s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 2.8412030924813805
===== < BUILDING NO.60 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 60%|██████    | 60/100 [2:29:02<1:40:15, 150.39s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 3.4471212007760275
===== < BUILDING NO.61 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 61%|██████    | 61/100 [2:31:29<1:37:07, 149.43s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.6694640249424926
===== < BUILDING NO.62 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 62%|██████▏   | 62/100 [2:33:56<1:34:05, 148.55s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 2.9761009640182317
===== < BUILDING NO.63 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 63%|██████▎   | 63/100 [2:36:23<1:31:17, 148.05s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 3.503682144904453
===== < BUILDING NO.64 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 64%|██████▍   | 64/100 [2:38:48<1:28:19, 147.20s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.4709048613822553
===== < BUILDING NO.65 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 65%|██████▌   | 65/100 [2:41:10<1:24:55, 145.58s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 5.000456045615479
===== < BUILDING NO.66 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 66%|██████▌   | 66/100 [2:43:34<1:22:20, 145.30s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 2.5415428013389474
===== < BUILDING NO.67 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 67%|██████▋   | 67/100 [2:45:59<1:19:50, 145.15s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 3.392291631802414
===== < BUILDING NO.68 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 68%|██████▊   | 68/100 [2:48:26<1:17:36, 145.52s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 2.927633462787152
===== < BUILDING NO.69 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 69%|██████▉   | 69/100 [2:50:55<1:15:49, 146.77s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.3872379222731837
===== < BUILDING NO.70 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 70%|███████   | 70/100 [2:53:28<1:14:14, 148.49s/it]

{'max_depth': 10, 'min_samples_leaf': 18, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 3.4329948365959693
===== < BUILDING NO.71 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 71%|███████   | 71/100 [2:55:57<1:11:49, 148.61s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.6365946851364144
===== < BUILDING NO.72 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 72%|███████▏  | 72/100 [2:58:19<1:08:31, 146.85s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 2.913706685466235
===== < BUILDING NO.73 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 73%|███████▎  | 73/100 [3:00:47<1:06:07, 146.93s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.7781505101405435
===== < BUILDING NO.74 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 74%|███████▍  | 74/100 [3:03:17<1:04:05, 147.92s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 3.2626865403394114
===== < BUILDING NO.75 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 75%|███████▌  | 75/100 [3:05:42<1:01:20, 147.24s/it]

{'max_depth': 10, 'min_samples_leaf': 18, 'min_samples_split': 8, 'n_estimators': 30}
SMAPE: 1.905127897972115
===== < BUILDING NO.76 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 76%|███████▌  | 76/100 [3:08:09<58:45, 146.89s/it]  

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 150}
SMAPE: 2.7341281581910777
===== < BUILDING NO.77 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 77%|███████▋  | 77/100 [3:10:38<56:38, 147.75s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.3804718073399467
===== < BUILDING NO.78 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 78%|███████▊  | 78/100 [3:13:10<54:33, 148.80s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.8588523943087272
===== < BUILDING NO.79 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 79%|███████▉  | 79/100 [3:15:40<52:14, 149.24s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 1.3397163491545003
===== < BUILDING NO.80 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 80%|████████  | 80/100 [3:18:11<49:59, 149.95s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.513995230916446
===== < BUILDING NO.81 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 81%|████████  | 81/100 [3:20:43<47:38, 150.44s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.6631727160230574
===== < BUILDING NO.82 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 82%|████████▏ | 82/100 [3:23:12<44:58, 149.90s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 2.4241549745579833
===== < BUILDING NO.83 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 83%|████████▎ | 83/100 [3:25:42<42:32, 150.14s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.7105807265322521
===== < BUILDING NO.84 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 84%|████████▍ | 84/100 [3:28:11<39:55, 149.72s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 1.7844378031919506
===== < BUILDING NO.85 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 85%|████████▌ | 85/100 [3:30:40<37:22, 149.51s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 2.3432714422991068
===== < BUILDING NO.86 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 86%|████████▌ | 86/100 [3:33:11<34:59, 149.93s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 100}
SMAPE: 3.622456418850993
===== < BUILDING NO.87 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 87%|████████▋ | 87/100 [3:35:40<32:24, 149.54s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 70}
SMAPE: 5.400051846923385
===== < BUILDING NO.88 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 88%|████████▊ | 88/100 [3:38:12<30:05, 150.46s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 30}
SMAPE: 3.102512091345232
===== < BUILDING NO.89 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 89%|████████▉ | 89/100 [3:40:42<27:34, 150.39s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 3.661464699562691
===== < BUILDING NO.90 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 90%|█████████ | 90/100 [3:43:16<25:11, 151.19s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
SMAPE: 3.5033587778450004
===== < BUILDING NO.91 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 91%|█████████ | 91/100 [3:45:46<22:37, 150.83s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 6.211673265338521
===== < BUILDING NO.92 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 92%|█████████▏| 92/100 [3:48:18<20:11, 151.44s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 20, 'n_estimators': 70}
SMAPE: 3.1944882260505283
===== < BUILDING NO.93 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 93%|█████████▎| 93/100 [3:50:48<17:37, 151.02s/it]

{'max_depth': 12, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 50}
SMAPE: 3.3251798994406743
===== < BUILDING NO.94 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


 94%|█████████▍| 94/100 [3:53:17<15:01, 150.33s/it]

{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 150}
SMAPE: 3.0058307993349924
===== < BUILDING NO.95 > =====
Fitting 5 folds for each of 180 candidates, totalling 900 fits


In [133]:
df.to_csv('hyperparameter_xgb.csv', index=False)

In [84]:
rf_params = df

In [85]:
rf_params.head()

Unnamed: 0,n_estimators,max_depth,min_samples_leaf,min_samples_split
0,150,8,8,8
0,100,8,8,8
0,30,6,8,8
0,100,8,8,8
0,50,10,8,8


In [90]:
preds = np.array([])
for i in tqdm(range(100)):
    pred_df = pd.DataFrame()
    for seed in range(0, 6):
        x_train = train.dropna().loc[train['건물번호']==i+1,].drop(['건물번호', 'power'], axis=1)
        y_train = train.dropna().loc[train['건물번호']==i+1, 'power']
        x_test = test.loc[test['건물번호']==i+1,].drop('건물번호', axis=1)

        rf = RandomForestRegressor(random_state=seed, n_estimators=rf_params.iloc[i, 0],
                                   max_depth=rf_params.iloc[i, 1],
                                   min_samples_leaf=rf_params.iloc[i, 2],
                                   min_samples_split=rf_params.iloc[i, 3])

        rf.fit(x_train, y_train)
        y_pred = rf.predict(x_test)
        pred_df.loc[:, seed] = y_pred

    pred = pred_df.mean(axis=1)
    preds = np.append(preds, pred)

  8%|▊         | 8/100 [00:15<03:02,  1.98s/it]


IndexError: index 8 is out of bounds for axis 0 with size 8

In [None]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = preds
sub.to_csv('rf_sub_24.csv', index=False)