In [1]:
import xgboost as xgb
import tqdm as tq
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import pandas as pd
from tqdm import tqdm
from xgboost import XGBRegressor
import numpy as np

### Train Data Preprocessing

In [68]:
train = pd.read_csv('train.csv')

In [69]:
train.head()

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
0,1_20220601 00,1,20220601 00,18.6,,0.9,42.0,,,1085.28
1,1_20220601 01,1,20220601 01,18.0,,1.1,45.0,,,1047.36
2,1_20220601 02,1,20220601 02,17.7,,1.5,45.0,,,974.88
3,1_20220601 03,1,20220601 03,16.7,,1.4,48.0,,,953.76
4,1_20220601 04,1,20220601 04,18.4,,2.8,43.0,,,986.4


In [70]:
train.pop('일조(hr)')
train.pop('일사(MJ/m2)')

train['일시'] = pd.to_datetime(train['일시'], format='%Y%m%d %H')
train['month'] = train['일시'].dt.month
train['day'] = train['일시'].dt.weekday
train['hour'] = train['일시'].dt.hour
train['holiday'] = train.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
train['sin_time'] = np.sin(2*np.pi*train.hour/24) # cyclical encoding
train['cos_time'] = np.cos(2*np.pi*train.hour/24)
train['DI'] = 1.8*train['기온(C)'] - 0.55*(1-train['습도(%)'])*(1.8*train['기온(C)']-26) + 32 # 불쾌지수

train.pop('num_date_time')
train.pop('일시')
train.pop('hour')
train['power'] = train['전력소비량(kWh)']
train.pop('전력소비량(kWh)')
train['강수량(mm)'] = train['강수량(mm)'].fillna(0)
train['풍속(m/s)'] = train['풍속(m/s)'].fillna(0)
train['습도(%)'] = train['습도(%)'].fillna(0)

In [71]:
train.to_csv('train_preprocessed.csv', index=False)

In [72]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI,power
0,1,18.6,0.0,0.9,42.0,6,2,0,0.0,1.0,234.154,1085.28
1,1,18.0,0.0,1.1,45.0,6,2,0,0.258819,0.965926,219.28,1047.36
2,1,17.7,0.0,1.5,45.0,6,2,0,0.5,0.866025,205.672,974.88
3,1,16.7,0.0,1.4,48.0,6,2,0,0.707107,0.707107,167.011,953.76
4,1,18.4,0.0,2.8,43.0,6,2,0,0.866025,0.5,229.592,986.4


### Test Data Preprocessing

In [73]:
test = pd.read_csv('test.csv')

In [74]:
test['일시'] = pd.to_datetime(test['일시'], format='%Y%m%d %H')
test['month'] = test['일시'].dt.month
test['day'] = test['일시'].dt.weekday
test['hour'] = test['일시'].dt.hour
test['holiday'] = test.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
test['sin_time'] = np.sin(2*np.pi*test.hour/24)
test['cos_time'] = np.cos(2*np.pi*test.hour/24)
test['DI'] = 1.8*test['기온(C)'] - 0.55*(1-test['습도(%)'])*(1.8*test['기온(C)']-26) + 32

test.pop('num_date_time')
test.pop('일시')
test.pop('hour')

0         0
1         1
2         2
3         3
4         4
         ..
16795    19
16796    20
16797    21
16798    22
16799    23
Name: hour, Length: 16800, dtype: int64

In [75]:
test.to_csv('test_preprocessed.csv', index=False)

In [14]:
test.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI
0,1,23.5,0.0,2.2,72,8,3,0,0.0,1.0,710.815
1,1,23.0,0.0,0.9,72,8,3,0,0.258819,0.965926,674.77
2,1,22.7,0.0,1.5,75,8,3,0,0.5,0.866025,677.662
3,1,22.1,0.0,1.3,78,8,3,0,0.707107,0.707107,655.363
4,1,21.8,0.0,1.0,77,8,3,0,0.866025,0.5,624.672


### Training

In [76]:
import os
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV, train_test_split

In [77]:
def SMAPE(true, pred):
    return np.mean((np.abs(true - pred))/(np.abs(true) + np.abs(pred))) * 100    

In [20]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,week,holiday,sin_time,cos_time,DI,power
0,1,18.6,0.0,0.9,42.0,6,2,22,0,0.0,1.0,234.154,1085.28
1,1,18.0,0.0,1.1,45.0,6,2,22,0,0.258819,0.965926,219.28,1047.36
2,1,17.7,0.0,1.5,45.0,6,2,22,0,0.5,0.866025,205.672,974.88
3,1,16.7,0.0,1.4,48.0,6,2,22,0,0.707107,0.707107,167.011,953.76
4,1,18.4,0.0,2.8,43.0,6,2,22,0,0.866025,0.5,229.592,986.4


In [17]:
def tr_ts_split(train, test, number, val_hour):
    x = train.loc[train['건물번호']==number].drop(['건물번호','power'], axis=1)
    y = train.loc[train['건물번호']==number, 'power']

    if val_hour == 0:
        return x, None, y, None
    else:
        x_train, x_valid, y_train, y_valid = x[:-val_hour], x[-val_hour:], y[:-val_hour], y[-val_hour:]
        return x_train, x_valid.reset_index(drop=True), y_train, y_valid.reset_index(drop=True)

In [18]:
def fit_and_pred_XGB(train, test, number, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [193]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_XGB(train, test, i+1, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)



===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:03<06:20,  3.84s/it]

SMAPE: 3.181562601718308
R2: 0.9615178576544824
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:05<03:44,  2.29s/it]

SMAPE: 4.984570757938752
R2: 0.8430983047175616
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:06<02:46,  1.71s/it]

SMAPE: 4.748388184464371
R2: 0.8652680846533359
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:07<02:21,  1.48s/it]

SMAPE: 1.9465192802099425
R2: 0.9750096667872354
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:09<02:50,  1.80s/it]

SMAPE: 3.6377630521333417
R2: 0.9559777174391022
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:11<02:42,  1.73s/it]

SMAPE: 2.4182941771967346
R2: 0.9578611514086105
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:12<02:31,  1.63s/it]

SMAPE: 4.307269667470138
R2: 0.8993115864328871
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:14<02:31,  1.65s/it]

SMAPE: 2.4014583106354452
R2: 0.7386998076599489
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:15<02:24,  1.59s/it]

SMAPE: 2.0614666848007412
R2: 0.9704629056232329
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:16<02:08,  1.43s/it]

SMAPE: 3.3571069607198223
R2: 0.5144255310690511
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:18<02:18,  1.55s/it]

SMAPE: 1.700174348810673
R2: 0.8950997020083612
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:20<02:31,  1.72s/it]

SMAPE: 2.1180166920773233
R2: 0.8906401719721289
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:22<02:26,  1.68s/it]

SMAPE: 3.6151650748380373
R2: 0.17604522990899563
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:23<02:06,  1.47s/it]

SMAPE: 8.198120578755367
R2: 0.29742151307643383
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:24<02:05,  1.48s/it]

SMAPE: 1.962666640803192
R2: 0.8051796262696933
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:25<01:56,  1.39s/it]

SMAPE: 2.125165321171025
R2: 0.9838213966223212
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:27<02:09,  1.56s/it]

SMAPE: 3.7827099578038905
R2: 0.9210108161974975
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:30<02:22,  1.73s/it]

SMAPE: 4.255991803243023
R2: 0.8145821532694238
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:33<02:58,  2.20s/it]

SMAPE: 4.627560568110455
R2: 0.9043691990560375
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:34<02:33,  1.91s/it]

SMAPE: 4.990809199318701
R2: 0.807362377328918
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:38<03:06,  2.36s/it]

SMAPE: 4.5053437760271775
R2: 0.8867790665838836
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:39<02:46,  2.14s/it]

SMAPE: 1.7310447332074246
R2: 0.9432956580474308
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:40<02:24,  1.87s/it]

SMAPE: 2.2610481582582374
R2: 0.726023593971761
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:45<03:24,  2.69s/it]

SMAPE: 1.384424148909194
R2: 0.980931285114505
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:48<03:34,  2.86s/it]

SMAPE: 1.171651714774279
R2: 0.9777454087526165
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:50<03:06,  2.53s/it]

SMAPE: 1.7684391038232175
R2: 0.880767977180326
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:52<02:58,  2.45s/it]

SMAPE: 1.305445028401994
R2: 0.8993590587439956
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [00:54<02:35,  2.16s/it]

SMAPE: 2.5874040167677412
R2: 0.9379539955620609
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [00:56<02:25,  2.05s/it]

SMAPE: 3.1934854742388294
R2: 0.7764724231780415
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [00:57<02:21,  2.01s/it]

SMAPE: 4.884202910135106
R2: 0.6590359638222282
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [00:59<02:02,  1.77s/it]

SMAPE: 2.438287067571604
R2: 0.9265729976617417
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [01:01<02:14,  1.98s/it]

SMAPE: 0.18972787638685584
R2: 0.8678953457361558
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [01:05<02:43,  2.43s/it]

SMAPE: 0.1408979959397814
R2: 0.8670570694537042
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [01:08<03:01,  2.75s/it]

SMAPE: 0.44838998614366005
R2: 0.7773076086029878
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [01:10<02:44,  2.53s/it]

SMAPE: 0.23867449466726
R2: 0.8647057581554892
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [01:12<02:34,  2.42s/it]

SMAPE: 0.222129844441478
R2: 0.8398184822359824
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [01:15<02:32,  2.43s/it]

SMAPE: 2.563431147909792
R2: 0.9719493249262493
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [01:16<02:09,  2.10s/it]

SMAPE: 2.3995481757433974
R2: 0.9783486811545055
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [01:18<01:59,  1.95s/it]

SMAPE: 2.5429887715918356
R2: 0.9875325363090149
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [01:19<01:43,  1.72s/it]

SMAPE: 4.949725375083061
R2: 0.9361800487246348
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [01:20<01:36,  1.64s/it]

SMAPE: 2.7871658388209255
R2: 0.9687369284261115
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [01:21<01:23,  1.44s/it]

SMAPE: 8.490695677002906
R2: 0.8312136913827755
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [01:24<01:42,  1.80s/it]

SMAPE: 2.5765757671732827
R2: 0.9963097008519095
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [01:25<01:31,  1.63s/it]

SMAPE: 2.0206921131093116
R2: 0.9956542300579928
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [01:27<01:32,  1.68s/it]

SMAPE: 0.9636166481882197
R2: 0.9741663176568942
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [01:28<01:26,  1.61s/it]

SMAPE: 2.9152820214674375
R2: 0.9089364609915891
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [01:30<01:32,  1.75s/it]

SMAPE: 2.44199562420449
R2: 0.9081604303723686
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [01:34<02:04,  2.39s/it]

SMAPE: 1.2924359762110216
R2: 0.9624260302021126
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [01:37<02:11,  2.57s/it]

SMAPE: 1.2044399453935408
R2: 0.9477899374860153
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [01:39<01:52,  2.25s/it]

SMAPE: 1.3424754879558616
R2: 0.9181100279667055
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [01:42<02:03,  2.51s/it]

SMAPE: 2.3069110202240872
R2: 0.860093590649482
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [01:47<02:33,  3.21s/it]

SMAPE: 3.0851602584690085
R2: 0.8374973787372814
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [01:48<02:02,  2.62s/it]

SMAPE: 7.2648417156493545
R2: 0.9203205423613228
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [01:49<01:42,  2.23s/it]

SMAPE: 16.030845208983116
R2: 0.37587593521080953
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [01:51<01:36,  2.15s/it]

SMAPE: 1.0556024170338933
R2: 0.8405256840114026
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [01:54<01:37,  2.21s/it]

SMAPE: 0.3995891138595821
R2: 0.9383633046815822
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [01:55<01:24,  1.97s/it]

SMAPE: 2.5823510572322594
R2: 0.982983988705182
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [01:57<01:27,  2.07s/it]

SMAPE: 0.23519363397504034
R2: 0.9228203340448847
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [01:59<01:19,  1.94s/it]

SMAPE: 2.5619438538110715
R2: 0.9815368789764827
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [02:00<01:09,  1.75s/it]

SMAPE: 2.8110172926551833
R2: 0.9583870172442827
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [02:02<01:05,  1.68s/it]

SMAPE: 2.340201478988097
R2: 0.9269577009344832
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [02:03<01:00,  1.60s/it]

SMAPE: 2.82218449337234
R2: 0.9096282895017103
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [02:05<00:57,  1.56s/it]

SMAPE: 3.3329194240853175
R2: 0.9043018344864934
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [02:06<00:56,  1.56s/it]

SMAPE: 2.187868446437268
R2: 0.9424236575325887
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [02:08<00:51,  1.48s/it]

SMAPE: 4.189424806271382
R2: 0.8776450394994727
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [02:09<00:52,  1.55s/it]

SMAPE: 1.9520773827079756
R2: 0.8716987879378835
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [02:10<00:46,  1.41s/it]

SMAPE: 2.763659889478162
R2: 0.8891725918174914
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [02:12<00:43,  1.36s/it]

SMAPE: 2.3269845074066002
R2: 0.9385765657446611
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [02:13<00:43,  1.40s/it]

SMAPE: 1.2487747471581605
R2: 0.9804228682897063
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [02:14<00:41,  1.38s/it]

SMAPE: 2.9096000798126633
R2: 0.8945569984264621
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [02:16<00:40,  1.40s/it]

SMAPE: 2.5240245149183576
R2: 0.7733786922239123
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [02:18<00:41,  1.50s/it]

SMAPE: 3.8508366997362655
R2: 0.5049414960821738
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [02:19<00:38,  1.44s/it]

SMAPE: 1.7260449595208565
R2: 0.803513176762261
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [02:21<00:45,  1.76s/it]

SMAPE: 3.6117350538550523
R2: 0.9113554135344053
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [02:23<00:42,  1.70s/it]

SMAPE: 2.207682379403786
R2: 0.8886641200872789
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [02:25<00:40,  1.69s/it]

SMAPE: 2.922708453346419
R2: 0.5843763343133019
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [02:26<00:36,  1.60s/it]

SMAPE: 1.9178124448321017
R2: 0.9230691455808102
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [02:28<00:36,  1.65s/it]

SMAPE: 2.3627014417844427
R2: 0.9345048024428463
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [02:29<00:34,  1.66s/it]

SMAPE: 2.672002629682452
R2: 0.8633319200457426
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [02:33<00:46,  2.31s/it]

SMAPE: 1.9637180950969766
R2: 0.9922439708420913
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [02:35<00:40,  2.14s/it]

SMAPE: 1.721184324954292
R2: 0.9920629233961014
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [02:37<00:35,  1.97s/it]

SMAPE: 2.784366406978639
R2: 0.9588340437684703
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [02:40<00:38,  2.29s/it]

SMAPE: 2.5503571201544255
R2: 0.9171166009511168
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [02:41<00:31,  2.00s/it]

SMAPE: 2.6010763764852443
R2: 0.8944995026867417
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [02:43<00:30,  2.03s/it]

SMAPE: 1.9473509361905856
R2: 0.9406841112684394
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [02:45<00:26,  1.86s/it]

SMAPE: 3.9845723824046284
R2: 0.894860727209134
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [02:47<00:26,  2.05s/it]

SMAPE: 5.201036249472348
R2: 0.8438980424875884
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [02:48<00:22,  1.85s/it]

SMAPE: 4.864697881141088
R2: 0.7454113087640606
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [02:50<00:18,  1.68s/it]

SMAPE: 4.656762722955426
R2: 0.7780055483680671
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [02:51<00:15,  1.57s/it]

SMAPE: 5.712615462814688
R2: 0.8086298772439142
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [02:53<00:16,  1.78s/it]

SMAPE: 6.040108479397927
R2: 0.922162363730947
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [02:55<00:13,  1.74s/it]

SMAPE: 4.074109230995168
R2: 0.8075872633607586
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [02:59<00:17,  2.46s/it]

SMAPE: 3.2030708810387942
R2: 0.8803605761426699
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [03:01<00:13,  2.19s/it]

SMAPE: 2.8951465349696477
R2: 0.9082408079583276
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [03:02<00:09,  1.88s/it]

SMAPE: 7.599879934805201
R2: 0.5946891859570982
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [03:03<00:06,  1.69s/it]

SMAPE: 1.3017287518895497
R2: 0.9758914129490925
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [03:07<00:06,  2.27s/it]

SMAPE: 2.2995340423161235
R2: 0.9373195927146769
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [03:08<00:03,  2.00s/it]

SMAPE: 6.382029092092538
R2: 0.8734307128116495
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [03:09<00:01,  1.80s/it]

SMAPE: 1.675374801219525
R2: 0.9784401189489322
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [03:12<00:00,  1.92s/it]

SMAPE: 2.7178215831385364
R2: 0.9645943448462445





In [194]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.181562601718308, 'R2': 0.9615178576544824},
 {'건물번호': 2, 'SMAPE': 4.984570757938752, 'R2': 0.8430983047175616},
 {'건물번호': 3, 'SMAPE': 4.748388184464371, 'R2': 0.8652680846533359},
 {'건물번호': 4, 'SMAPE': 1.9465192802099425, 'R2': 0.9750096667872354},
 {'건물번호': 5, 'SMAPE': 3.6377630521333417, 'R2': 0.9559777174391022},
 {'건물번호': 6, 'SMAPE': 2.4182941771967346, 'R2': 0.9578611514086105},
 {'건물번호': 7, 'SMAPE': 4.307269667470138, 'R2': 0.8993115864328871},
 {'건물번호': 8, 'SMAPE': 2.4014583106354452, 'R2': 0.7386998076599489},
 {'건물번호': 9, 'SMAPE': 2.0614666848007412, 'R2': 0.9704629056232329},
 {'건물번호': 10, 'SMAPE': 3.3571069607198223, 'R2': 0.5144255310690511},
 {'건물번호': 11, 'SMAPE': 1.700174348810673, 'R2': 0.8950997020083612},
 {'건물번호': 12, 'SMAPE': 2.1180166920773233, 'R2': 0.8906401719721289},
 {'건물번호': 13, 'SMAPE': 3.6151650748380373, 'R2': 0.17604522990899563},
 {'건물번호': 14, 'SMAPE': 8.198120578755367, 'R2': 0.29742151307643383},
 {'건물번호': 15, 'SMAPE': 1.96266664

In [195]:
smape_val

3.0176168545696984

In [71]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = pred_test_whole
sub.to_csv('xgb_sub_15.csv', index=False)

### Improvement

In [78]:
train['적정온도차이'] = train['기온(C)'] - 26

In [79]:
test['적정온도차이'] = test['기온(C)'] - 26

In [34]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI,power,적정온도차이
0,1,18.6,0.0,0.9,42.0,6,2,0,0.0,1.0,234.154,1085.28,-7.4
1,1,18.0,0.0,1.1,45.0,6,2,0,0.258819,0.965926,219.28,1047.36,-8.0
2,1,17.7,0.0,1.5,45.0,6,2,0,0.5,0.866025,205.672,974.88,-8.3
3,1,16.7,0.0,1.4,48.0,6,2,0,0.707107,0.707107,167.011,953.76,-9.3
4,1,18.4,0.0,2.8,43.0,6,2,0,0.866025,0.5,229.592,986.4,-7.6


In [35]:
def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

In [36]:
def fit_and_pred_XGB_with_weight(train, test, number, weight, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.set_params(**{'objective':weighted_mse(weight)})
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [203]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_XGB_with_weight(train, test, i+1, 100, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)



===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:01<03:12,  1.94s/it]

SMAPE: 3.5419388838477404
R2: 0.9563832166343148
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:03<02:52,  1.76s/it]

SMAPE: 4.295780152455329
R2: 0.8651174662607124
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:05<02:37,  1.62s/it]

SMAPE: 4.976127431146375
R2: 0.8282886548210623
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:06<02:38,  1.65s/it]

SMAPE: 1.916974758959043
R2: 0.9749491993399805
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:09<03:23,  2.14s/it]

SMAPE: 2.8188883844313564
R2: 0.972446435735248
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:11<03:21,  2.14s/it]

SMAPE: 1.8247299710309055
R2: 0.9802984729616211
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:13<03:09,  2.04s/it]

SMAPE: 4.181533402777831
R2: 0.906970899840581
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:15<03:03,  2.00s/it]

SMAPE: 2.8739138223687473
R2: 0.6702282399188223
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:17<03:05,  2.04s/it]

SMAPE: 1.8549493161639723
R2: 0.984736771457575
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:19<02:50,  1.89s/it]

SMAPE: 3.5416225095446876
R2: 0.4322418016566194
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:21<02:53,  1.95s/it]

SMAPE: 1.686647477272509
R2: 0.8955227858960689
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:24<03:14,  2.21s/it]

SMAPE: 1.9311341180054387
R2: 0.8995117473179703
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:26<03:04,  2.12s/it]

SMAPE: 3.004711373509035
R2: 0.3961679661426334
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:27<02:43,  1.91s/it]

SMAPE: 7.909728524427743
R2: 0.34326299780211256
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:29<02:50,  2.00s/it]

SMAPE: 1.8971932943063454
R2: 0.8458132584564995
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:31<02:40,  1.92s/it]

SMAPE: 2.020895267248837
R2: 0.9855279166437871
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:33<02:46,  2.01s/it]

SMAPE: 3.0345161593836227
R2: 0.9455755730715113
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:36<03:06,  2.27s/it]

SMAPE: 3.5805708796288402
R2: 0.9046722159410551
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:39<03:11,  2.37s/it]

SMAPE: 3.926657594482213
R2: 0.939612714103329
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:41<03:13,  2.42s/it]

SMAPE: 4.505567909655219
R2: 0.7790902289158669
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:43<03:05,  2.35s/it]

SMAPE: 4.278992383451455
R2: 0.9122880595171534
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:45<02:55,  2.25s/it]

SMAPE: 1.714605236128287
R2: 0.9462697614433282
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:48<03:01,  2.35s/it]

SMAPE: 1.9977630506533826
R2: 0.7118909379916054
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:51<03:13,  2.55s/it]

SMAPE: 1.2147046751398998
R2: 0.9876692285042109
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:54<03:14,  2.59s/it]

SMAPE: 1.071914519547852
R2: 0.9838502476852887
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:56<03:02,  2.46s/it]

SMAPE: 1.2754434182491863
R2: 0.9526249517572365
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:58<02:53,  2.38s/it]

SMAPE: 1.2089446938323858
R2: 0.9196695823152246
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [01:00<02:40,  2.23s/it]

SMAPE: 2.3315863766342164
R2: 0.9464801178483762
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [01:02<02:40,  2.26s/it]

SMAPE: 2.6397465527714137
R2: 0.893935316935024
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [01:05<02:45,  2.36s/it]

SMAPE: 3.9998166322091757
R2: 0.8506247677458555
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [01:06<02:27,  2.14s/it]

SMAPE: 2.404524493769024
R2: 0.9470255358159374
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [01:10<02:56,  2.60s/it]

SMAPE: 0.20167359555235026
R2: 0.8627698561039703
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [01:14<03:25,  3.07s/it]

SMAPE: 0.17276764974408107
R2: 0.8229450580971566
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [01:18<03:29,  3.17s/it]

SMAPE: 0.29083416300093756
R2: 0.8981595130056876
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [01:21<03:23,  3.13s/it]

SMAPE: 0.20213486317948995
R2: 0.9010597272573889
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [01:24<03:25,  3.21s/it]

SMAPE: 0.22888851855130976
R2: 0.8347444701991675
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [01:26<03:02,  2.89s/it]

SMAPE: 1.9402276172887187
R2: 0.9851492380233352
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [01:30<03:20,  3.24s/it]

SMAPE: 1.9617611605591168
R2: 0.9943749284020462
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [01:33<03:13,  3.17s/it]

SMAPE: 2.728407977794472
R2: 0.9885208059263002
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [01:35<02:44,  2.75s/it]

SMAPE: 4.546766536131502
R2: 0.9656154457447984
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [01:38<02:46,  2.82s/it]

SMAPE: 2.6815394639501484
R2: 0.9838264802312887
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [01:40<02:20,  2.42s/it]

SMAPE: 8.269081357655107
R2: 0.8281778457904116
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [01:42<02:13,  2.35s/it]

SMAPE: 2.755572977719789
R2: 0.994723176597687
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [01:44<02:02,  2.19s/it]

SMAPE: 1.8201344384866398
R2: 0.9967343309116449
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [01:46<02:02,  2.23s/it]

SMAPE: 1.031363070076244
R2: 0.9737000429866236
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [01:48<01:58,  2.19s/it]

SMAPE: 2.6331329043994436
R2: 0.92278881387551
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [01:50<01:54,  2.16s/it]

SMAPE: 2.2539981932224626
R2: 0.9370271337273428
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [01:52<01:55,  2.23s/it]

SMAPE: 1.1154793572288213
R2: 0.9814344911305359
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [01:55<01:57,  2.31s/it]

SMAPE: 1.2120048314692171
R2: 0.9532919655392368
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [01:57<01:51,  2.22s/it]

SMAPE: 1.0199365813115648
R2: 0.9601624825442548
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [01:59<01:51,  2.29s/it]

SMAPE: 2.0669342021532136
R2: 0.8991889464170111
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [02:02<01:54,  2.39s/it]

SMAPE: 2.6000342771919813
R2: 0.9036673759514853
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [02:04<01:42,  2.17s/it]

SMAPE: 6.854501133324095
R2: 0.9364671663465698
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [02:06<01:45,  2.30s/it]

SMAPE: 15.395086816186549
R2: 0.49099037535994716
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [02:09<01:46,  2.36s/it]

SMAPE: 0.9133351004211036
R2: 0.8675089182357368
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [02:12<01:56,  2.64s/it]

SMAPE: 0.5136544276081201
R2: 0.9081687822234766
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [02:14<01:44,  2.43s/it]

SMAPE: 2.5555318460320944
R2: 0.9844644443151935
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [02:18<01:55,  2.76s/it]

SMAPE: 0.25820350743212367
R2: 0.8921785168651243
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [02:20<01:43,  2.53s/it]

SMAPE: 2.6890831895609573
R2: 0.9830364479014171
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [02:22<01:33,  2.34s/it]

SMAPE: 3.108999755320832
R2: 0.9513218358851899
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [02:24<01:27,  2.24s/it]

SMAPE: 2.098628213842975
R2: 0.9361982912678116
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [02:25<01:20,  2.13s/it]

SMAPE: 2.578592625332191
R2: 0.9294393834986527
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [02:27<01:17,  2.10s/it]

SMAPE: 2.971701521957132
R2: 0.9287959839133162
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [02:29<01:13,  2.05s/it]

SMAPE: 1.985596970855797
R2: 0.9568742246611621
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [02:31<01:08,  1.95s/it]

SMAPE: 4.477452022680745
R2: 0.8716774757272846
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [02:33<01:09,  2.03s/it]

SMAPE: 1.8075105650528174
R2: 0.8956864249497588
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [02:35<01:03,  1.92s/it]

SMAPE: 2.4244448231708127
R2: 0.9204433840320086
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [02:37<01:01,  1.91s/it]

SMAPE: 2.0780954476198916
R2: 0.9457956065829916
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [02:39<01:00,  1.95s/it]

SMAPE: 1.3608021628841247
R2: 0.9763652402810177
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [02:41<00:58,  1.94s/it]

SMAPE: 2.1535704873595076
R2: 0.9490361026829824
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [02:43<00:59,  2.04s/it]

SMAPE: 1.9380533494838634
R2: 0.8843182552902552
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [02:46<01:03,  2.27s/it]

SMAPE: 3.11790978159792
R2: 0.6752659005529251
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [02:48<00:58,  2.17s/it]

SMAPE: 1.894932481017263
R2: 0.7765012765933019
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [02:50<00:56,  2.15s/it]

SMAPE: 2.9064913642972994
R2: 0.9340450887680255
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [02:52<00:52,  2.11s/it]

SMAPE: 1.8454658791086458
R2: 0.9271562141468898
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [02:54<00:50,  2.10s/it]

SMAPE: 2.6014692289978596
R2: 0.7430607283602755
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [02:56<00:49,  2.14s/it]

SMAPE: 2.01922110911595
R2: 0.9243078132874606
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [02:58<00:46,  2.09s/it]

SMAPE: 1.973855662055632
R2: 0.9842392917365872
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [03:01<00:48,  2.29s/it]

SMAPE: 1.701066369657164
R2: 0.9559524985782594
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [03:04<00:47,  2.37s/it]

SMAPE: 2.124459802533189
R2: 0.9855431738335988
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [03:06<00:43,  2.29s/it]

SMAPE: 1.791810368984323
R2: 0.9921057077506286
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [03:08<00:39,  2.21s/it]

SMAPE: 2.2331208822734103
R2: 0.9791614753485487
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [03:09<00:35,  2.10s/it]

SMAPE: 2.0530122898435006
R2: 0.9689341497362238
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [03:12<00:37,  2.32s/it]

SMAPE: 1.5542968158928958
R2: 0.9811956709238533
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [03:16<00:41,  2.79s/it]

SMAPE: 1.8305992084508518
R2: 0.9525778546733997
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [03:19<00:37,  2.65s/it]

SMAPE: 3.2313918652357763
R2: 0.9409824975851684
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [03:21<00:31,  2.45s/it]

SMAPE: 5.291645176902233
R2: 0.8432895618265352
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [03:22<00:27,  2.25s/it]

SMAPE: 3.7365340906742253
R2: 0.8688590929058374
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [03:24<00:23,  2.15s/it]

SMAPE: 3.7449668254633037
R2: 0.8604168252423186
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [03:26<00:19,  1.99s/it]

SMAPE: 4.34946441335132
R2: 0.945437045071281
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [03:28<00:17,  1.90s/it]

SMAPE: 6.43265018021721
R2: 0.9286566290153027
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [03:30<00:16,  2.07s/it]

SMAPE: 3.913048284116499
R2: 0.8626063819152766
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [03:36<00:23,  3.34s/it]

SMAPE: 3.2880901234526063
R2: 0.8776833565580425
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [03:38<00:17,  2.99s/it]

SMAPE: 2.938023570863095
R2: 0.9149138873111788
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [03:40<00:12,  2.55s/it]

SMAPE: 7.873954719913101
R2: 0.5904394760729672
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [03:42<00:09,  2.33s/it]

SMAPE: 1.4553180409335444
R2: 0.9685772721403265
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [03:44<00:07,  2.36s/it]

SMAPE: 2.4088445027411467
R2: 0.9322687034662536
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [03:46<00:04,  2.18s/it]

SMAPE: 7.2588277284224985
R2: 0.8560677105509416
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [03:48<00:02,  2.10s/it]

SMAPE: 1.5314447971624443
R2: 0.9820596675396264
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [03:50<00:00,  2.31s/it]

SMAPE: 2.597097674198934
R2: 0.967838655132623





In [204]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.5419388838477404, 'R2': 0.9563832166343148},
 {'건물번호': 2, 'SMAPE': 4.295780152455329, 'R2': 0.8651174662607124},
 {'건물번호': 3, 'SMAPE': 4.976127431146375, 'R2': 0.8282886548210623},
 {'건물번호': 4, 'SMAPE': 1.916974758959043, 'R2': 0.9749491993399805},
 {'건물번호': 5, 'SMAPE': 2.8188883844313564, 'R2': 0.972446435735248},
 {'건물번호': 6, 'SMAPE': 1.8247299710309055, 'R2': 0.9802984729616211},
 {'건물번호': 7, 'SMAPE': 4.181533402777831, 'R2': 0.906970899840581},
 {'건물번호': 8, 'SMAPE': 2.8739138223687473, 'R2': 0.6702282399188223},
 {'건물번호': 9, 'SMAPE': 1.8549493161639723, 'R2': 0.984736771457575},
 {'건물번호': 10, 'SMAPE': 3.5416225095446876, 'R2': 0.4322418016566194},
 {'건물번호': 11, 'SMAPE': 1.686647477272509, 'R2': 0.8955227858960689},
 {'건물번호': 12, 'SMAPE': 1.9311341180054387, 'R2': 0.8995117473179703},
 {'건물번호': 13, 'SMAPE': 3.004711373509035, 'R2': 0.3961679661426334},
 {'건물번호': 14, 'SMAPE': 7.909728524427743, 'R2': 0.34326299780211256},
 {'건물번호': 15, 'SMAPE': 1.8971932943063

In [205]:
smape_val

2.790566542032983

In [93]:
sub['answer'] = pred_test_whole
sub.to_csv('xgb_sub_16.csv', index=False)

### Parameter Tuning - Grid Search CV

In [37]:
from sklearn.model_selection import GridSearchCV, PredefinedSplit

In [None]:
df = pd.DataFrame(columns = ['n_estimators', 'eta', 'min_child_weight', 'max_depth', 'colsample_bytree', 'subsample'])
preds = np.array([])
grid = {
    'n_estimators':     [30, 50, 70, 100],
    'eta':              [0.01],
    'min_child_weight': np.arange(1, 8, 1),
    'max_depth':        np.arange(3, 9, 1),
    'colsample_bytree': np.arange(0.8, 1.0, 0.1),
    'subsample':        np.arange(0.8, 1.0, 0.1)
}

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    gcv = GridSearchCV(estimator=XGBRegressor(seed=0, gpu_id=0,
                                              tree_method='gpu_hist', predictor='gpu_predictor'),
                       param_grid=grid,
                       scoring=make_scorer(SMAPE, greater_is_better=False),
                       cv=5,
                       refit=True,
                       verbose=True)
    gcv.fit(x_train, y_train)
    best = gcv.best_estimator_
    params = gcv.best_params_
    print(params)
    pred = best.predict(x_valid)
    print(f'SMAPE: {SMAPE(y_valid, pred)}')
    preds = np.append(preds, pred)
    df = pd.concat([df, pd.DataFrame(params, index=[0])], axis=0)

  0%|          | 0/100 [00:00<?, ?it/s]

===== < BUILDING NO.1 > =====
Fitting 5 folds for each of 672 candidates, totalling 3360 fits


In [133]:
df.to_csv('hyperparameter_xgb.csv', index=False)

In [None]:
xgb_params = df

In [None]:
scores = []
best_iter = []

for i in tqdm(range(100)):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                            max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4],
                            subsample=xgb_params.iloc[i, 5], seed=0)
    xgb_reg.set_params(**{'objective':weighted_mse(100)})
    xgb_reg.set_params(early_stopping_rounds=300)
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], verbose=False)
    y_pred = xgb_reg.predict(x_valid)
    pred = pd.Series(y_pred)
    sm = SMAPE(y_valid, y_pred)
    scores.append(sm)
    best_iter.append(xgb_reg.best_iteration) # 실제 best iteration은 이 값에 +1

In [None]:
alpha_list = []
smape_list = []

for i in tqdm(range(100)):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    xgb = XGBRegressor(seed=0,
                        n_estimators=best_iter[i], eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                        max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
    xgb.fit(x_train, y_train)
    pred0 = xgb.predict(x_valid)
    best_alpha = 0
    score0 = SMAPE(y_valid, pred0)

    for j in [1, 2, 5, 7, 10, 25, 50, 75, 100]:
        xgb = XGBRegressor(seed=0,
                        n_estimators=best_iter[i], eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                        max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
        xgb.set_params(**{'objective': weighted_mse(j)})
        xgb.fit(x_train, y_train)
        pred1 = xgb.predict(x_valid)
        score1 = SMAPE(y_valid, pred1)
        if score1 < score0:
            best_alpha = j
            score0 = score1

    alpha_list.append(best_alpha)
    smape_list.append(score0)
    print(f"building {i+1} || best score: {score0} || alpha: {best_alpha}")

In [None]:
xgb_params['alpha'] = alpha_list
xgb_params['best_iter'] = best_iter
xgb_params.head()

In [None]:
xgb_params.to_csv('hyperparameter_xgb_last_testing.csv', index=False)

In [None]:
best_iter = xgb_params['best_iter'].to_list()
best_iter[0]

In [212]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,week,holiday,sin_time,cos_time,DI,power,연면적(m2),냉방면적(m2),태양광용량(kW),ESS저장용량(kWh),PCS용량(kW),건물유형_건물기타,건물유형_공공,건물유형_대학교,건물유형_데이터센터,건물유형_백화점및아울렛,건물유형_병원,건물유형_상용,건물유형_아파트,건물유형_연구소,건물유형_지식산업센터,건물유형_할인마트,건물유형_호텔및리조트,CDH
0,1,18.6,0.0,0.9,42.0,6,2,22,0,0.0,1.0,234.154,1085.28,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-7.4
1,1,18.0,0.0,1.1,45.0,6,2,22,0,0.258819,0.965926,219.28,1047.36,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-15.4
2,1,17.7,0.0,1.5,45.0,6,2,22,0,0.5,0.866025,205.672,974.88,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-23.7
3,1,16.7,0.0,1.4,48.0,6,2,22,0,0.707107,0.707107,167.011,953.76,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-33.0
4,1,18.4,0.0,2.8,43.0,6,2,22,0,0.866025,0.5,229.592,986.4,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-40.6


In [None]:
preds = np.array([])
for i in tqdm(range(100)):
    pred_df = pd.DataFrame()
    for seed in range(0, 6):
        x_train = train.loc[train['건물번호']==i+1,].drop(['건물번호', 'power'], axis=1)
        y_train = train.loc[train['건물번호']==i+1, 'power']
        x_test = test.loc[test['건물번호']==i+1,].drop('건물번호', axis=1)

        xgb = XGBRegressor(seed=seed, n_estimators=best_iter[i], eta=0.01,
                            min_child_weight=xgb_params.iloc[i, 2], max_depth=xgb_params.iloc[i, 3],
                            colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
        if xgb_params.iloc[i, 6] != 0:
            xgb.set_params(**{'objective':weighted_mse(xgb_params.iloc[i, 6])})

        xgb.fit(x_train, y_train)
        y_pred = xgb.predict(x_test)
        pred_df.loc[:, seed] = y_pred

    pred = pred_df.mean(axis=1)
    preds = np.append(preds, pred)

In [None]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = preds
sub.to_csv('xgb_sub_19.csv', index=False)

In [215]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.5419388838477404, 'R2': 0.9563832166343148},
 {'건물번호': 2, 'SMAPE': 4.295780152455329, 'R2': 0.8651174662607124},
 {'건물번호': 3, 'SMAPE': 4.976127431146375, 'R2': 0.8282886548210623},
 {'건물번호': 4, 'SMAPE': 1.916974758959043, 'R2': 0.9749491993399805},
 {'건물번호': 5, 'SMAPE': 2.8188883844313564, 'R2': 0.972446435735248},
 {'건물번호': 6, 'SMAPE': 1.8247299710309055, 'R2': 0.9802984729616211},
 {'건물번호': 7, 'SMAPE': 4.181533402777831, 'R2': 0.906970899840581},
 {'건물번호': 8, 'SMAPE': 2.8739138223687473, 'R2': 0.6702282399188223},
 {'건물번호': 9, 'SMAPE': 1.8549493161639723, 'R2': 0.984736771457575},
 {'건물번호': 10, 'SMAPE': 3.5416225095446876, 'R2': 0.4322418016566194},
 {'건물번호': 11, 'SMAPE': 1.686647477272509, 'R2': 0.8955227858960689},
 {'건물번호': 12, 'SMAPE': 1.9311341180054387, 'R2': 0.8995117473179703},
 {'건물번호': 13, 'SMAPE': 3.004711373509035, 'R2': 0.3961679661426334},
 {'건물번호': 14, 'SMAPE': 7.909728524427743, 'R2': 0.34326299780211256},
 {'건물번호': 15, 'SMAPE': 1.8971932943063

In [57]:
train[train['건물번호']==54].head(24)

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
108120,54_20220601 00,54,20220601 00,18.6,,2.6,62.0,,,578.16
108121,54_20220601 01,54,20220601 01,18.8,,0.4,62.0,,,408.24
108122,54_20220601 02,54,20220601 02,18.9,,0.0,52.0,,,398.16
108123,54_20220601 03,54,20220601 03,20.2,,2.4,38.0,,,387.36
108124,54_20220601 04,54,20220601 04,20.4,,2.8,40.0,,,387.36
108125,54_20220601 05,54,20220601 05,20.0,,1.7,41.0,,,419.76
108126,54_20220601 06,54,20220601 06,19.6,,0.0,47.0,0.0,0.04,612.72
108127,54_20220601 07,54,20220601 07,21.2,,2.0,38.0,0.0,0.29,661.68
108128,54_20220601 08,54,20220601 08,22.9,,1.3,36.0,0.0,0.69,1210.32
108129,54_20220601 09,54,20220601 09,25.6,,0.3,32.0,0.8,1.43,1901.16


In [52]:
bi = pd.read_csv('building_info.csv')

In [54]:
bi[bi['건물번호']==54]

Unnamed: 0,건물번호,건물유형,연면적(m2),냉방면적(m2),태양광용량(kW),ESS저장용량(kWh),PCS용량(kW)
53,54,상용,109400.2,65803.57,-,-,-


test와 train의 분포 차이 확인  
random forest 모델 확인  
과대 예측 잡기  
54번 건물 주의(튀는 애 발견, outlier 제거 후 시도 예를 들면 뒤에서 1000개 빼서 수행)  
feature selection 요망  