In [1]:
import xgboost as xgb
import tqdm as tq
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import pandas as pd
from tqdm import tqdm
from xgboost import XGBRegressor
import numpy as np

### Train Data Preprocessing

In [2]:
train = pd.read_csv('train.csv')

In [69]:
train.head()

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
0,1_20220601 00,1,20220601 00,18.6,,0.9,42.0,,,1085.28
1,1_20220601 01,1,20220601 01,18.0,,1.1,45.0,,,1047.36
2,1_20220601 02,1,20220601 02,17.7,,1.5,45.0,,,974.88
3,1_20220601 03,1,20220601 03,16.7,,1.4,48.0,,,953.76
4,1_20220601 04,1,20220601 04,18.4,,2.8,43.0,,,986.4


In [3]:
train.pop('일조(hr)')
train.pop('일사(MJ/m2)')

train['일시'] = pd.to_datetime(train['일시'], format='%Y%m%d %H')
train['month'] = train['일시'].dt.month
train['day'] = train['일시'].dt.weekday
train['hour'] = train['일시'].dt.hour
train['holiday'] = train.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
train['sin_time'] = np.sin(2*np.pi*train.hour/24) # cyclical encoding
train['cos_time'] = np.cos(2*np.pi*train.hour/24)
train['DI'] = 1.8*train['기온(C)'] - 0.55*(1-train['습도(%)'])*(1.8*train['기온(C)']-26) + 32 # 불쾌지수

train.pop('num_date_time')
train.pop('일시')
train.pop('hour')
train['power'] = train['전력소비량(kWh)']
train.pop('전력소비량(kWh)')
train['강수량(mm)'] = train['강수량(mm)'].fillna(0)
train['풍속(m/s)'] = train['풍속(m/s)'].fillna(0)
train['습도(%)'] = train['습도(%)'].fillna(0)

In [71]:
train.to_csv('train_preprocessed.csv', index=False)

In [72]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI,power
0,1,18.6,0.0,0.9,42.0,6,2,0,0.0,1.0,234.154,1085.28
1,1,18.0,0.0,1.1,45.0,6,2,0,0.258819,0.965926,219.28,1047.36
2,1,17.7,0.0,1.5,45.0,6,2,0,0.5,0.866025,205.672,974.88
3,1,16.7,0.0,1.4,48.0,6,2,0,0.707107,0.707107,167.011,953.76
4,1,18.4,0.0,2.8,43.0,6,2,0,0.866025,0.5,229.592,986.4


### Test Data Preprocessing

In [4]:
test = pd.read_csv('test.csv')

In [5]:
test['일시'] = pd.to_datetime(test['일시'], format='%Y%m%d %H')
test['month'] = test['일시'].dt.month
test['day'] = test['일시'].dt.weekday
test['hour'] = test['일시'].dt.hour
test['holiday'] = test.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
test['sin_time'] = np.sin(2*np.pi*test.hour/24)
test['cos_time'] = np.cos(2*np.pi*test.hour/24)
test['DI'] = 1.8*test['기온(C)'] - 0.55*(1-test['습도(%)'])*(1.8*test['기온(C)']-26) + 32

test.pop('num_date_time')
test.pop('일시')
test.pop('hour')

0         0
1         1
2         2
3         3
4         4
         ..
16795    19
16796    20
16797    21
16798    22
16799    23
Name: hour, Length: 16800, dtype: int64

In [75]:
test.to_csv('test_preprocessed.csv', index=False)

In [14]:
test.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI
0,1,23.5,0.0,2.2,72,8,3,0,0.0,1.0,710.815
1,1,23.0,0.0,0.9,72,8,3,0,0.258819,0.965926,674.77
2,1,22.7,0.0,1.5,75,8,3,0,0.5,0.866025,677.662
3,1,22.1,0.0,1.3,78,8,3,0,0.707107,0.707107,655.363
4,1,21.8,0.0,1.0,77,8,3,0,0.866025,0.5,624.672


### Training

In [6]:
import os
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV, train_test_split

In [7]:
def SMAPE(true, pred):
    return np.mean((np.abs(true - pred))/(np.abs(true) + np.abs(pred))) * 100    

In [20]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,week,holiday,sin_time,cos_time,DI,power
0,1,18.6,0.0,0.9,42.0,6,2,22,0,0.0,1.0,234.154,1085.28
1,1,18.0,0.0,1.1,45.0,6,2,22,0,0.258819,0.965926,219.28,1047.36
2,1,17.7,0.0,1.5,45.0,6,2,22,0,0.5,0.866025,205.672,974.88
3,1,16.7,0.0,1.4,48.0,6,2,22,0,0.707107,0.707107,167.011,953.76
4,1,18.4,0.0,2.8,43.0,6,2,22,0,0.866025,0.5,229.592,986.4


In [43]:
def tr_ts_split(train, test, number, val_hour):
    x = train.loc[train['건물번호']==number].drop(['건물번호','power'], axis=1)
    y = train.loc[train['건물번호']==number, 'power']

    #if number == 54:
    x = pd.concat([x[:601], x[700:]], axis=0).reset_index(drop=True)
    y = pd.concat([y[:601], y[700:]]).reset_index(drop=True)

    if val_hour == 0:
        return x, None, y, None
    else:
        x_train, x_valid, y_train, y_valid = x[:-val_hour], x[-val_hour:], y[:-val_hour], y[-val_hour:]
        return x_train, x_valid.reset_index(drop=True), y_train, y_valid.reset_index(drop=True)

In [31]:
def fit_and_pred_XGB(train, test, number, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [32]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_XGB(train, test, i+1, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)



===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:02<04:48,  2.92s/it]

SMAPE: 3.498611346685474
R2: 0.9505132437893566
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:03<02:48,  1.72s/it]

SMAPE: 5.293149692920055
R2: 0.8348522802848739
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:04<02:07,  1.32s/it]

SMAPE: 5.356015760282614
R2: 0.8317239037502239
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:05<01:52,  1.17s/it]

SMAPE: 1.9496199263359284
R2: 0.9762195277260349
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:06<01:53,  1.20s/it]

SMAPE: 3.7857848095046234
R2: 0.9553275085539296
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:08<02:02,  1.30s/it]

SMAPE: 2.1892490411326784
R2: 0.967172993781067
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:09<01:53,  1.22s/it]

SMAPE: 4.193700780841268
R2: 0.9114530612447274
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:10<01:54,  1.25s/it]

SMAPE: 2.3769706796950647
R2: 0.738734383877521
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:12<02:01,  1.33s/it]

SMAPE: 2.16289941970656
R2: 0.967354544053431
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:13<01:48,  1.21s/it]

SMAPE: 3.544824546047977
R2: 0.4787061091828202
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:14<01:53,  1.28s/it]

SMAPE: 1.7123269044155176
R2: 0.8951795398298666
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:16<02:11,  1.49s/it]

SMAPE: 2.4624122023586574
R2: 0.8611547191554905
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:18<02:14,  1.54s/it]

SMAPE: 3.8020252872378526
R2: 0.06425205045303584
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:19<01:56,  1.36s/it]

SMAPE: 8.189375174857945
R2: 0.3001255022595253
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:20<01:50,  1.30s/it]

SMAPE: 1.3960957705789798
R2: 0.9228701003346914
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:21<01:42,  1.22s/it]

SMAPE: 2.37106666750909
R2: 0.979179132078914
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:22<01:52,  1.35s/it]

SMAPE: 2.9756161842993314
R2: 0.9385202744996135
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:24<02:04,  1.52s/it]

SMAPE: 3.165082367695018
R2: 0.9375511870537827
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:27<02:20,  1.73s/it]

SMAPE: 4.358012297704934
R2: 0.9230614953424408
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:29<02:35,  1.95s/it]

SMAPE: 2.5586305116839703
R2: 0.9785709236531792
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:31<02:23,  1.81s/it]

SMAPE: 3.6659506192398292
R2: 0.9428450509662992
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:33<02:23,  1.85s/it]

SMAPE: 1.902485722034615
R2: 0.9367990805900666
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:34<02:13,  1.73s/it]

SMAPE: 0.9514831460618797
R2: 0.9632550515558436
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:36<02:27,  1.94s/it]

SMAPE: 1.2794468921916642
R2: 0.9853502403705504
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:39<02:36,  2.08s/it]

SMAPE: 1.1516069326919902
R2: 0.9847659748838598
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:40<02:14,  1.82s/it]

SMAPE: 1.1736706196874707
R2: 0.98175166017241
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:41<02:01,  1.67s/it]

SMAPE: 0.9084647467001031
R2: 0.9775322852392823
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [00:42<01:44,  1.45s/it]

SMAPE: 2.3442610504342447
R2: 0.9622112572638699
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [00:43<01:34,  1.34s/it]

SMAPE: 1.9620275137975263
R2: 0.9534660818164693
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [00:45<01:35,  1.36s/it]

SMAPE: 3.3856411799114743
R2: 0.8823913463829575
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [00:46<01:25,  1.24s/it]

SMAPE: 2.384384521486442
R2: 0.94324772686843
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [00:48<01:37,  1.43s/it]

SMAPE: 0.18168687118474042
R2: 0.8716500227384778
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [00:50<02:05,  1.87s/it]

SMAPE: 0.20891778105074252
R2: 0.7054608729440801
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [00:52<01:52,  1.70s/it]

SMAPE: 0.4507656500730843
R2: 0.7307975289905393
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [00:53<01:47,  1.65s/it]

SMAPE: 0.18468850679458235
R2: 0.9115293692906026
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [00:55<01:43,  1.61s/it]

SMAPE: 0.3578870503331797
R2: 0.574332776557588
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [00:56<01:37,  1.54s/it]

SMAPE: 2.660110985955761
R2: 0.9695916360038769
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [00:57<01:30,  1.46s/it]

SMAPE: 2.2131404037796796
R2: 0.9891327941919511
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [00:59<01:27,  1.43s/it]

SMAPE: 2.8645511876284258
R2: 0.9666631614328486
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [01:00<01:17,  1.29s/it]

SMAPE: 6.643655640197986
R2: 0.8709919208716461
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [01:01<01:15,  1.27s/it]

SMAPE: 2.2347387777521126
R2: 0.9854015286589034
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [01:02<01:06,  1.15s/it]

SMAPE: 8.228826606906
R2: 0.8357761373008403
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [01:04<01:26,  1.51s/it]

SMAPE: 2.9528802344260834
R2: 0.9955083927006674
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [01:05<01:17,  1.38s/it]

SMAPE: 2.032185484045241
R2: 0.995921105373115
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [01:07<01:20,  1.46s/it]

SMAPE: 1.1458782040514401
R2: 0.96757679741029
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [01:08<01:18,  1.45s/it]

SMAPE: 2.3714425280938563
R2: 0.9450029202995658
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [01:10<01:11,  1.35s/it]

SMAPE: 2.044764699812375
R2: 0.9413476203084042
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [01:11<01:19,  1.53s/it]

SMAPE: 0.9768273462234967
R2: 0.9874124206907716
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [01:15<01:51,  2.18s/it]

SMAPE: 1.3807927693226596
R2: 0.9447582834230677
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [01:16<01:32,  1.85s/it]

SMAPE: 1.2009932920678894
R2: 0.9505989225566546
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [01:19<01:38,  2.02s/it]

SMAPE: 2.0636869887810967
R2: 0.9226817695294973
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [01:20<01:32,  1.93s/it]

SMAPE: 2.0493298626705543
R2: 0.9489697309683368
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [01:21<01:18,  1.66s/it]

SMAPE: 5.340041274410151
R2: 0.9583782816579087
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [01:23<01:12,  1.57s/it]

SMAPE: 10.663077734269486
R2: 0.7350227445210766
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [01:24<01:06,  1.47s/it]

SMAPE: 0.8380785040874941
R2: 0.9062577506522023
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [01:26<01:05,  1.49s/it]

SMAPE: 0.436452167015171
R2: 0.9264579482651879
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [01:27<01:02,  1.45s/it]

SMAPE: 2.4157824816625957
R2: 0.985868447747852
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [01:28<01:01,  1.46s/it]

SMAPE: 0.21341890017288512
R2: 0.9404798584567119
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [01:30<00:59,  1.45s/it]

SMAPE: 2.7906883013793666
R2: 0.9816288680993065
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [01:31<00:53,  1.33s/it]

SMAPE: 2.9905881730765027
R2: 0.9476653446138144
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [01:33<00:56,  1.45s/it]

SMAPE: 2.3325793107355137
R2: 0.9419311184660939
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [01:34<00:53,  1.42s/it]

SMAPE: 2.726030019359426
R2: 0.9200633805392667
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [01:35<00:50,  1.36s/it]

SMAPE: 3.325040607868205
R2: 0.9046751977295702
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [01:36<00:47,  1.33s/it]

SMAPE: 1.958886127511314
R2: 0.9553564388571527
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [01:39<00:56,  1.61s/it]

SMAPE: 4.127504757150685
R2: 0.8851226254850699
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [01:40<00:51,  1.51s/it]

SMAPE: 2.1232509656724003
R2: 0.8491745678471003
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [01:41<00:43,  1.33s/it]

SMAPE: 2.70937445575454
R2: 0.8970033666233985
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [01:43<00:47,  1.50s/it]

SMAPE: 2.1886706022607543
R2: 0.9487191283137584
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [01:44<00:43,  1.40s/it]

SMAPE: 1.2091538313247658
R2: 0.9776313760133133
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [01:46<00:43,  1.45s/it]

SMAPE: 3.759346043562698
R2: 0.8893023300973883
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [01:47<00:42,  1.46s/it]

SMAPE: 2.2493401190950553
R2: 0.8897731373450036
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [01:49<00:41,  1.47s/it]

SMAPE: 3.185010208474607
R2: 0.6825262184411249
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [01:50<00:36,  1.37s/it]

SMAPE: 1.5649296968782316
R2: 0.8350323078445261
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [01:54<01:02,  2.41s/it]

SMAPE: 3.0884551216243783
R2: 0.925112756541863
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [01:56<00:51,  2.06s/it]

SMAPE: 1.9518594881003344
R2: 0.9305276733980469
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [01:58<00:50,  2.09s/it]

SMAPE: 2.378903798467285
R2: 0.8056589748466432
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [01:59<00:42,  1.86s/it]

SMAPE: 1.1479610035193055
R2: 0.993420805667553
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [02:01<00:38,  1.75s/it]

SMAPE: 1.4670866115977244
R2: 0.9939958344760238
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [02:02<00:34,  1.64s/it]

SMAPE: 1.4692884160996864
R2: 0.9817841410210375
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [02:06<00:43,  2.18s/it]

SMAPE: 2.171891235615472
R2: 0.9895727614599519
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [02:07<00:36,  1.95s/it]

SMAPE: 1.7450167297371562
R2: 0.9918030115076087
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [02:13<00:56,  3.16s/it]

SMAPE: 2.062635362144611
R2: 0.9871176437793039
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [02:14<00:43,  2.58s/it]

SMAPE: 1.497535147324334
R2: 0.9943026934178416
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [02:16<00:36,  2.25s/it]

SMAPE: 1.3276465432049291
R2: 0.9925873429229188
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [02:21<00:45,  3.06s/it]

SMAPE: 1.8373643714454613
R2: 0.9456730970038871
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [02:22<00:34,  2.49s/it]

SMAPE: 4.850684883845767
R2: 0.775971950154437
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [02:23<00:27,  2.12s/it]

SMAPE: 5.5841178904621875
R2: 0.8225076428444532
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [02:24<00:21,  1.81s/it]

SMAPE: 3.766632448166618
R2: 0.8739348947397174
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [02:25<00:17,  1.62s/it]

SMAPE: 4.236226554102335
R2: 0.8199982629962352
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [02:28<00:19,  1.99s/it]

SMAPE: 4.331814576967058
R2: 0.9212626381652762
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [02:30<00:16,  1.87s/it]

SMAPE: 6.458625793746524
R2: 0.9280963286652838
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [02:31<00:12,  1.59s/it]

SMAPE: 4.120474151511109
R2: 0.8557563838597853
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [02:33<00:13,  1.94s/it]

SMAPE: 3.040951808064286
R2: 0.8896454501784379
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [02:35<00:10,  1.78s/it]

SMAPE: 3.0860145602071376
R2: 0.8985798264959164
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [02:36<00:07,  1.52s/it]

SMAPE: 6.931617789405021
R2: 0.6613899714805689
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [02:37<00:05,  1.43s/it]

SMAPE: 1.3652719050666224
R2: 0.9705848996133637
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [02:38<00:04,  1.47s/it]

SMAPE: 2.90581804228281
R2: 0.9110813920058481
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [02:40<00:02,  1.37s/it]

SMAPE: 6.455782151672955
R2: 0.8680712977269041
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [02:41<00:01,  1.31s/it]

SMAPE: 1.714640903519672
R2: 0.9770043906034206
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [02:42<00:00,  1.63s/it]

SMAPE: 2.7237784868028996
R2: 0.9623124054018236





In [33]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.498611346685474, 'R2': 0.9505132437893566},
 {'건물번호': 2, 'SMAPE': 5.293149692920055, 'R2': 0.8348522802848739},
 {'건물번호': 3, 'SMAPE': 5.356015760282614, 'R2': 0.8317239037502239},
 {'건물번호': 4, 'SMAPE': 1.9496199263359284, 'R2': 0.9762195277260349},
 {'건물번호': 5, 'SMAPE': 3.7857848095046234, 'R2': 0.9553275085539296},
 {'건물번호': 6, 'SMAPE': 2.1892490411326784, 'R2': 0.967172993781067},
 {'건물번호': 7, 'SMAPE': 4.193700780841268, 'R2': 0.9114530612447274},
 {'건물번호': 8, 'SMAPE': 2.3769706796950647, 'R2': 0.738734383877521},
 {'건물번호': 9, 'SMAPE': 2.16289941970656, 'R2': 0.967354544053431},
 {'건물번호': 10, 'SMAPE': 3.544824546047977, 'R2': 0.4787061091828202},
 {'건물번호': 11, 'SMAPE': 1.7123269044155176, 'R2': 0.8951795398298666},
 {'건물번호': 12, 'SMAPE': 2.4624122023586574, 'R2': 0.8611547191554905},
 {'건물번호': 13, 'SMAPE': 3.8020252872378526, 'R2': 0.06425205045303584},
 {'건물번호': 14, 'SMAPE': 8.189375174857945, 'R2': 0.3001255022595253},
 {'건물번호': 15, 'SMAPE': 1.39609577057897

In [34]:
smape_val

2.757639832433073

In [71]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = pred_test_whole
sub.to_csv('xgb_sub_15.csv', index=False)

### Improvement

In [35]:
train['적정온도차이'] = train['기온(C)'] - 26

In [36]:
test['적정온도차이'] = test['기온(C)'] - 26

In [34]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI,power,적정온도차이
0,1,18.6,0.0,0.9,42.0,6,2,0,0.0,1.0,234.154,1085.28,-7.4
1,1,18.0,0.0,1.1,45.0,6,2,0,0.258819,0.965926,219.28,1047.36,-8.0
2,1,17.7,0.0,1.5,45.0,6,2,0,0.5,0.866025,205.672,974.88,-8.3
3,1,16.7,0.0,1.4,48.0,6,2,0,0.707107,0.707107,167.011,953.76,-9.3
4,1,18.4,0.0,2.8,43.0,6,2,0,0.866025,0.5,229.592,986.4,-7.6


In [37]:
def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

In [38]:
def fit_and_pred_XGB_with_weight(train, test, number, weight, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.set_params(**{'objective':weighted_mse(weight)})
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [44]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_XGB_with_weight(train, test, i+1, 100, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)



===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:01<02:57,  1.80s/it]

SMAPE: 3.883634407146668
R2: 0.9428148434678705
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:03<02:26,  1.50s/it]

SMAPE: 5.256542311528698
R2: 0.8268383595313882
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:04<02:11,  1.35s/it]

SMAPE: 5.545908115619256
R2: 0.8225081381620768
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:05<02:12,  1.39s/it]

SMAPE: 2.0216084532929974
R2: 0.9738995748382593
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:07<02:42,  1.71s/it]

SMAPE: 2.8028523787496087
R2: 0.9733667899879699
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:09<02:47,  1.78s/it]

SMAPE: 1.861585193427125
R2: 0.981714605738076
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:11<02:33,  1.65s/it]

SMAPE: 5.0474765252492375
R2: 0.8962074296471658
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:12<02:30,  1.63s/it]

SMAPE: 2.5489555005401963
R2: 0.7355127209176974
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:14<02:33,  1.69s/it]

SMAPE: 2.0478630548996173
R2: 0.975397976943655
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:16<02:21,  1.57s/it]

SMAPE: 3.907172078032968
R2: 0.32611646656584903
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:17<02:23,  1.62s/it]

SMAPE: 1.676392266013526
R2: 0.9014899611347567
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:20<02:56,  2.00s/it]

SMAPE: 1.8723574629050932
R2: 0.9008362680544917
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:22<02:48,  1.93s/it]

SMAPE: 3.0274657714415754
R2: 0.3797024654736437
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:23<02:28,  1.73s/it]

SMAPE: 8.411099584140395
R2: 0.2536910586619444
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:25<02:24,  1.70s/it]

SMAPE: 1.2548517916499227
R2: 0.9371161321264849
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:26<02:16,  1.63s/it]

SMAPE: 2.2092638330705237
R2: 0.9848943403666937
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:28<02:23,  1.72s/it]

SMAPE: 2.9155781243960273
R2: 0.9459348281219652
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:30<02:28,  1.81s/it]

SMAPE: 3.3829864057136083
R2: 0.9332030692073239
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:33<02:43,  2.02s/it]

SMAPE: 4.304075013735136
R2: 0.9364720115975896
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:35<02:37,  1.97s/it]

SMAPE: 2.378545266421429
R2: 0.9764064401504943
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:36<02:31,  1.92s/it]

SMAPE: 3.9669116215508895
R2: 0.933790666753363
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:38<02:26,  1.87s/it]

SMAPE: 1.8200203037843907
R2: 0.9429240295041776
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:40<02:35,  2.02s/it]

SMAPE: 0.9373158040898235
R2: 0.966113940778409
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:43<02:40,  2.11s/it]

SMAPE: 1.288296737357208
R2: 0.9864269965196982
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:45<02:44,  2.20s/it]

SMAPE: 1.0654398164794496
R2: 0.9887994766418751
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:47<02:31,  2.05s/it]

SMAPE: 0.9936471600354451
R2: 0.9862221989284544
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:49<02:22,  1.95s/it]

SMAPE: 0.8588802301263323
R2: 0.9817016980675318
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [00:50<02:06,  1.76s/it]

SMAPE: 2.695373169555824
R2: 0.9583253430170969
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [00:51<01:59,  1.68s/it]

SMAPE: 1.976031642209499
R2: 0.9524808637651322
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [00:53<01:55,  1.65s/it]

SMAPE: 3.5546984752156083
R2: 0.8875174353264503
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [00:54<01:48,  1.57s/it]

SMAPE: 2.56804945577574
R2: 0.9430689817703377
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [00:57<01:59,  1.75s/it]

SMAPE: 0.19702161248468097
R2: 0.8575105699889993
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [00:59<02:16,  2.03s/it]

SMAPE: 0.22625441905510688
R2: 0.664683287931862
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [01:02<02:31,  2.29s/it]

SMAPE: 0.3245615439520965
R2: 0.8743641573149044
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [01:05<02:31,  2.33s/it]

SMAPE: 0.1725662905282774
R2: 0.9231181475717967
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [01:07<02:37,  2.46s/it]

SMAPE: 0.3293354963327888
R2: 0.6086908187486058
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [01:09<02:23,  2.29s/it]

SMAPE: 2.156013386887052
R2: 0.9834896887299619
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [01:11<02:10,  2.11s/it]

SMAPE: 1.79901744109431
R2: 0.997437075975758
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [01:13<02:00,  1.97s/it]

SMAPE: 2.6661003736987903
R2: 0.9789087896318968
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [01:14<01:47,  1.80s/it]

SMAPE: 4.45893690485189
R2: 0.9573252573751764
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [01:16<01:55,  1.96s/it]

SMAPE: 2.5510663147591073
R2: 0.9930563582771769
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [01:18<01:41,  1.74s/it]

SMAPE: 8.887439371539775
R2: 0.8210955033826771
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [01:19<01:40,  1.76s/it]

SMAPE: 3.2731321691803545
R2: 0.9935898557260004
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [01:21<01:34,  1.69s/it]

SMAPE: 2.0138184178735856
R2: 0.9963370546141956
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [01:23<01:37,  1.77s/it]

SMAPE: 1.2292015666710925
R2: 0.9618127671900834
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [01:25<01:34,  1.75s/it]

SMAPE: 2.262918598327897
R2: 0.9497221063116454
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [01:26<01:30,  1.70s/it]

SMAPE: 2.163123832268895
R2: 0.9368352656382489
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [01:28<01:29,  1.73s/it]

SMAPE: 1.0124164128796722
R2: 0.9867960252855252
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [01:30<01:37,  1.91s/it]

SMAPE: 1.231602471054904
R2: 0.9518363003022309
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [01:32<01:32,  1.84s/it]

SMAPE: 1.149476273028294
R2: 0.9402324357123669
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [01:34<01:31,  1.86s/it]

SMAPE: 1.7940870821424981
R2: 0.9374935367784654
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [01:36<01:33,  1.95s/it]

SMAPE: 1.7134989361480766
R2: 0.969279720743553
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [01:37<01:23,  1.78s/it]

SMAPE: 5.7870331403832855
R2: 0.9500465530970634
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [01:39<01:19,  1.73s/it]

SMAPE: 9.343993649103458
R2: 0.8290382764818288
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [01:41<01:18,  1.74s/it]

SMAPE: 0.8002626858072206
R2: 0.9195686449786508
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [01:43<01:23,  1.91s/it]

SMAPE: 0.372070315733699
R2: 0.9577856450595469
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [01:45<01:18,  1.83s/it]

SMAPE: 2.582743473340681
R2: 0.9855450716287837
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [01:47<01:20,  1.91s/it]

SMAPE: 0.2109516149812387
R2: 0.9415446081549514
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [01:48<01:15,  1.83s/it]

SMAPE: 2.727613907355983
R2: 0.9804815904020056
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [01:50<01:09,  1.74s/it]

SMAPE: 3.128806069117498
R2: 0.9440451782825701
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [01:52<01:05,  1.68s/it]

SMAPE: 2.4695452930152224
R2: 0.9320376576270133
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [01:53<01:02,  1.63s/it]

SMAPE: 2.800901630300211
R2: 0.9255194239196292
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [01:55<00:58,  1.59s/it]

SMAPE: 3.237423712443478
R2: 0.9093444882521864
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [01:56<00:56,  1.57s/it]

SMAPE: 1.983562546595305
R2: 0.9449119561229407
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [01:57<00:53,  1.52s/it]

SMAPE: 4.087620852650325
R2: 0.8779487186996391
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [01:59<00:52,  1.55s/it]

SMAPE: 2.1400472638820167
R2: 0.8596778515618013
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [02:00<00:49,  1.50s/it]

SMAPE: 2.7280610479038323
R2: 0.8819381309576115
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [02:02<00:47,  1.49s/it]

SMAPE: 2.4629394121129358
R2: 0.9323077167946682
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [02:04<00:47,  1.54s/it]

SMAPE: 1.1647349083922025
R2: 0.9778160470628237
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [02:06<00:49,  1.66s/it]

SMAPE: 2.7534337821495734
R2: 0.9469181421177187
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [02:07<00:48,  1.67s/it]

SMAPE: 2.19866582767245
R2: 0.8921923890792537
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [02:10<00:51,  1.86s/it]

SMAPE: 2.3547259883621723
R2: 0.788860197681444
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [02:11<00:48,  1.80s/it]

SMAPE: 1.6954986887964207
R2: 0.8131771834720108
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [02:13<00:49,  1.89s/it]

SMAPE: 3.181952948728036
R2: 0.9189238392978
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [02:15<00:44,  1.79s/it]

SMAPE: 1.7656772240774978
R2: 0.9381620469863149
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [02:16<00:41,  1.72s/it]

SMAPE: 2.4709027490552686
R2: 0.7829026370287808
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [02:18<00:39,  1.72s/it]

SMAPE: 1.2620726817927796
R2: 0.9920637841574659
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [02:20<00:40,  1.82s/it]

SMAPE: 1.5501687975698433
R2: 0.993533370338121
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [02:22<00:41,  1.96s/it]

SMAPE: 1.0436883340074417
R2: 0.9911248490291023
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [02:24<00:39,  1.98s/it]

SMAPE: 1.6852099716763582
R2: 0.9945649161319271
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [02:26<00:36,  1.94s/it]

SMAPE: 1.8448466598461641
R2: 0.9912631340286862
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [02:28<00:34,  1.90s/it]

SMAPE: 2.1228550962234487
R2: 0.9920777946017227
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [02:30<00:31,  1.86s/it]

SMAPE: 1.4764124214173946
R2: 0.9935302051646853
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [02:32<00:29,  1.83s/it]

SMAPE: 1.4184075224078103
R2: 0.9911678050073048
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [02:35<00:36,  2.41s/it]

SMAPE: 1.7766891378931844
R2: 0.9554452075929176
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [02:38<00:32,  2.34s/it]

SMAPE: 3.8742220810478485
R2: 0.8744989436601363
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [02:40<00:28,  2.22s/it]

SMAPE: 4.984363164766773
R2: 0.8799899192668625
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [02:41<00:25,  2.09s/it]

SMAPE: 2.891185437185699
R2: 0.9259233720667035
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [02:43<00:21,  1.93s/it]

SMAPE: 2.586491535349424
R2: 0.9507338522500512
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [02:45<00:18,  1.87s/it]

SMAPE: 2.9521107797127164
R2: 0.9701321600674983
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [02:46<00:16,  1.86s/it]

SMAPE: 6.481577375826742
R2: 0.9529123507180252
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [02:48<00:14,  1.82s/it]

SMAPE: 2.862682784417281
R2: 0.9385937785090032
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [02:50<00:12,  1.79s/it]

SMAPE: 3.373833828234354
R2: 0.8689785795761118
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [02:52<00:10,  1.79s/it]

SMAPE: 2.9929373345061463
R2: 0.9160657067346847
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [02:53<00:08,  1.62s/it]

SMAPE: 7.258248738455955
R2: 0.6312822310967584
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [02:55<00:06,  1.61s/it]

SMAPE: 1.491430100787991
R2: 0.9651313111504027
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [02:57<00:05,  1.82s/it]

SMAPE: 2.4080386121744386
R2: 0.9334937482771224
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [02:58<00:03,  1.70s/it]

SMAPE: 7.038908245544515
R2: 0.8492584312608136
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [03:00<00:01,  1.69s/it]

SMAPE: 1.6848495597962838
R2: 0.9786802231329808
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [03:02<00:00,  1.82s/it]

SMAPE: 2.642659282172129
R2: 0.9674120094300811





In [45]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.883634407146668, 'R2': 0.9428148434678705},
 {'건물번호': 2, 'SMAPE': 5.256542311528698, 'R2': 0.8268383595313882},
 {'건물번호': 3, 'SMAPE': 5.545908115619256, 'R2': 0.8225081381620768},
 {'건물번호': 4, 'SMAPE': 2.0216084532929974, 'R2': 0.9738995748382593},
 {'건물번호': 5, 'SMAPE': 2.8028523787496087, 'R2': 0.9733667899879699},
 {'건물번호': 6, 'SMAPE': 1.861585193427125, 'R2': 0.981714605738076},
 {'건물번호': 7, 'SMAPE': 5.0474765252492375, 'R2': 0.8962074296471658},
 {'건물번호': 8, 'SMAPE': 2.5489555005401963, 'R2': 0.7355127209176974},
 {'건물번호': 9, 'SMAPE': 2.0478630548996173, 'R2': 0.975397976943655},
 {'건물번호': 10, 'SMAPE': 3.907172078032968, 'R2': 0.32611646656584903},
 {'건물번호': 11, 'SMAPE': 1.676392266013526, 'R2': 0.9014899611347567},
 {'건물번호': 12, 'SMAPE': 1.8723574629050932, 'R2': 0.9008362680544917},
 {'건물번호': 13, 'SMAPE': 3.0274657714415754, 'R2': 0.3797024654736437},
 {'건물번호': 14, 'SMAPE': 8.411099584140395, 'R2': 0.2536910586619444},
 {'건물번호': 15, 'SMAPE': 1.254851791649

In [46]:
smape_val

2.638455250836857

In [47]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = pred_test_whole
sub.to_csv('xgb_sub_21.csv', index=False)

### Parameter Tuning - Grid Search CV

In [48]:
from sklearn.model_selection import GridSearchCV, PredefinedSplit

In [None]:
df = pd.DataFrame(columns = ['n_estimators', 'eta', 'min_child_weight', 'max_depth', 'colsample_bytree', 'subsample'])
preds = np.array([])
grid = {
    'n_estimators':     [30, 50, 70, 100],
    'eta':              [0.01],
    'min_child_weight': np.arange(1, 8, 1),
    'max_depth':        np.arange(3, 9, 1),
    'colsample_bytree': np.arange(0.8, 1.0, 0.1),
    'subsample':        np.arange(0.8, 1.0, 0.1)
}

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    gcv = GridSearchCV(estimator=XGBRegressor(seed=0, gpu_id=0,
                                              tree_method='gpu_hist', predictor='gpu_predictor'),
                       param_grid=grid,
                       scoring=make_scorer(SMAPE, greater_is_better=False),
                       cv=5,
                       refit=True,
                       verbose=True)
    gcv.fit(x_train, y_train)
    best = gcv.best_estimator_
    params = gcv.best_params_
    print(params)
    pred = best.predict(x_valid)
    print(f'SMAPE: {SMAPE(y_valid, pred)}')
    preds = np.append(preds, pred)
    df = pd.concat([df, pd.DataFrame(params, index=[0])], axis=0)

  0%|          | 0/100 [00:00<?, ?it/s]

===== < BUILDING NO.1 > =====
Fitting 5 folds for each of 672 candidates, totalling 3360 fits


In [133]:
df.to_csv('hyperparameter_xgb.csv', index=False)

In [None]:
xgb_params = df

In [None]:
scores = []
best_iter = []

for i in tqdm(range(100)):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                            max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4],
                            subsample=xgb_params.iloc[i, 5], seed=0)
    xgb_reg.set_params(**{'objective':weighted_mse(100)})
    xgb_reg.set_params(early_stopping_rounds=300)
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], verbose=False)
    y_pred = xgb_reg.predict(x_valid)
    pred = pd.Series(y_pred)
    sm = SMAPE(y_valid, y_pred)
    scores.append(sm)
    best_iter.append(xgb_reg.best_iteration) # 실제 best iteration은 이 값에 +1

In [None]:
alpha_list = []
smape_list = []

for i in tqdm(range(100)):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    xgb = XGBRegressor(seed=0,
                        n_estimators=best_iter[i], eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                        max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
    xgb.fit(x_train, y_train)
    pred0 = xgb.predict(x_valid)
    best_alpha = 0
    score0 = SMAPE(y_valid, pred0)

    for j in [1, 2, 5, 7, 10, 25, 50, 75, 100]:
        xgb = XGBRegressor(seed=0,
                        n_estimators=best_iter[i], eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                        max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
        xgb.set_params(**{'objective': weighted_mse(j)})
        xgb.fit(x_train, y_train)
        pred1 = xgb.predict(x_valid)
        score1 = SMAPE(y_valid, pred1)
        if score1 < score0:
            best_alpha = j
            score0 = score1

    alpha_list.append(best_alpha)
    smape_list.append(score0)
    print(f"building {i+1} || best score: {score0} || alpha: {best_alpha}")

In [None]:
xgb_params['alpha'] = alpha_list
xgb_params['best_iter'] = best_iter
xgb_params.head()

In [None]:
xgb_params.to_csv('hyperparameter_xgb_real_last_testing.csv', index=False)

In [None]:
best_iter = xgb_params['best_iter'].to_list()
best_iter[0]

In [212]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,week,holiday,sin_time,cos_time,DI,power,연면적(m2),냉방면적(m2),태양광용량(kW),ESS저장용량(kWh),PCS용량(kW),건물유형_건물기타,건물유형_공공,건물유형_대학교,건물유형_데이터센터,건물유형_백화점및아울렛,건물유형_병원,건물유형_상용,건물유형_아파트,건물유형_연구소,건물유형_지식산업센터,건물유형_할인마트,건물유형_호텔및리조트,CDH
0,1,18.6,0.0,0.9,42.0,6,2,22,0,0.0,1.0,234.154,1085.28,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-7.4
1,1,18.0,0.0,1.1,45.0,6,2,22,0,0.258819,0.965926,219.28,1047.36,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-15.4
2,1,17.7,0.0,1.5,45.0,6,2,22,0,0.5,0.866025,205.672,974.88,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-23.7
3,1,16.7,0.0,1.4,48.0,6,2,22,0,0.707107,0.707107,167.011,953.76,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-33.0
4,1,18.4,0.0,2.8,43.0,6,2,22,0,0.866025,0.5,229.592,986.4,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-40.6


In [None]:
preds = np.array([])
for i in tqdm(range(100)):
    pred_df = pd.DataFrame()
    for seed in range(0, 6):
        x_train = train.loc[train['건물번호']==i+1,].drop(['건물번호', 'power'], axis=1)
        y_train = train.loc[train['건물번호']==i+1, 'power']
        x_test = test.loc[test['건물번호']==i+1,].drop('건물번호', axis=1)

        xgb = XGBRegressor(seed=seed, n_estimators=best_iter[i], eta=0.01,
                            min_child_weight=xgb_params.iloc[i, 2], max_depth=xgb_params.iloc[i, 3],
                            colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
        if xgb_params.iloc[i, 6] != 0:
            xgb.set_params(**{'objective':weighted_mse(xgb_params.iloc[i, 6])})

        xgb.fit(x_train, y_train)
        y_pred = xgb.predict(x_test)
        pred_df.loc[:, seed] = y_pred

    pred = pred_df.mean(axis=1)
    preds = np.append(preds, pred)

In [None]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = preds
sub.to_csv('xgb_sub_22.csv', index=False)

In [215]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.5419388838477404, 'R2': 0.9563832166343148},
 {'건물번호': 2, 'SMAPE': 4.295780152455329, 'R2': 0.8651174662607124},
 {'건물번호': 3, 'SMAPE': 4.976127431146375, 'R2': 0.8282886548210623},
 {'건물번호': 4, 'SMAPE': 1.916974758959043, 'R2': 0.9749491993399805},
 {'건물번호': 5, 'SMAPE': 2.8188883844313564, 'R2': 0.972446435735248},
 {'건물번호': 6, 'SMAPE': 1.8247299710309055, 'R2': 0.9802984729616211},
 {'건물번호': 7, 'SMAPE': 4.181533402777831, 'R2': 0.906970899840581},
 {'건물번호': 8, 'SMAPE': 2.8739138223687473, 'R2': 0.6702282399188223},
 {'건물번호': 9, 'SMAPE': 1.8549493161639723, 'R2': 0.984736771457575},
 {'건물번호': 10, 'SMAPE': 3.5416225095446876, 'R2': 0.4322418016566194},
 {'건물번호': 11, 'SMAPE': 1.686647477272509, 'R2': 0.8955227858960689},
 {'건물번호': 12, 'SMAPE': 1.9311341180054387, 'R2': 0.8995117473179703},
 {'건물번호': 13, 'SMAPE': 3.004711373509035, 'R2': 0.3961679661426334},
 {'건물번호': 14, 'SMAPE': 7.909728524427743, 'R2': 0.34326299780211256},
 {'건물번호': 15, 'SMAPE': 1.8971932943063

In [57]:
train[train['건물번호']==54].head(24)

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
108120,54_20220601 00,54,20220601 00,18.6,,2.6,62.0,,,578.16
108121,54_20220601 01,54,20220601 01,18.8,,0.4,62.0,,,408.24
108122,54_20220601 02,54,20220601 02,18.9,,0.0,52.0,,,398.16
108123,54_20220601 03,54,20220601 03,20.2,,2.4,38.0,,,387.36
108124,54_20220601 04,54,20220601 04,20.4,,2.8,40.0,,,387.36
108125,54_20220601 05,54,20220601 05,20.0,,1.7,41.0,,,419.76
108126,54_20220601 06,54,20220601 06,19.6,,0.0,47.0,0.0,0.04,612.72
108127,54_20220601 07,54,20220601 07,21.2,,2.0,38.0,0.0,0.29,661.68
108128,54_20220601 08,54,20220601 08,22.9,,1.3,36.0,0.0,0.69,1210.32
108129,54_20220601 09,54,20220601 09,25.6,,0.3,32.0,0.8,1.43,1901.16


In [52]:
bi = pd.read_csv('building_info.csv')

In [54]:
bi[bi['건물번호']==54]

Unnamed: 0,건물번호,건물유형,연면적(m2),냉방면적(m2),태양광용량(kW),ESS저장용량(kWh),PCS용량(kW)
53,54,상용,109400.2,65803.57,-,-,-


test와 train의 분포 차이 확인
random forest 모델 확인
과대 예측 잡기
54번 건물 주의(튀는 애 발견, outlier 제거 후 시도 예를 들면 뒤에서 1000개 빼서 수행)
feature selection 요망