In [1]:
import xgboost as xgb
import tqdm as tq
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import pandas as pd
from tqdm import tqdm
from xgboost import XGBRegressor
import numpy as np

### Train Data Preprocessing

In [5]:
train = pd.read_csv('train.csv')

In [69]:
train.head()

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
0,1_20220601 00,1,20220601 00,18.6,,0.9,42.0,,,1085.28
1,1_20220601 01,1,20220601 01,18.0,,1.1,45.0,,,1047.36
2,1_20220601 02,1,20220601 02,17.7,,1.5,45.0,,,974.88
3,1_20220601 03,1,20220601 03,16.7,,1.4,48.0,,,953.76
4,1_20220601 04,1,20220601 04,18.4,,2.8,43.0,,,986.4


In [6]:
train.pop('일조(hr)')
train.pop('일사(MJ/m2)')

train['일시'] = pd.to_datetime(train['일시'], format='%Y%m%d %H')
train['month'] = train['일시'].dt.month
train['day'] = train['일시'].dt.weekday
train['hour'] = train['일시'].dt.hour
train['holiday'] = train.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
train['quarter'] = train.apply(lambda x: 2 if x['month']<7 else 3, axis=1)
train['sin_time'] = np.sin(2*np.pi*train.hour/24) # cyclical encoding
train['cos_time'] = np.cos(2*np.pi*train.hour/24)
train['DI'] = 1.8*train['기온(C)'] - 0.55*(1-train['습도(%)'])*(1.8*train['기온(C)']-26) + 32 # 불쾌지수

train.pop('num_date_time')
train.pop('일시')
train.pop('hour')
train['power'] = train['전력소비량(kWh)']
train.pop('전력소비량(kWh)')
train.pop('강수량(mm)')
train['풍속(m/s)'] = train['풍속(m/s)'].fillna(0)
train['습도(%)'] = train['습도(%)'].fillna(0)

In [71]:
train.to_csv('train_preprocessed.csv', index=False)

In [7]:
train.head()

Unnamed: 0,건물번호,기온(C),풍속(m/s),습도(%),month,day,holiday,quarter,sin_time,cos_time,DI,power
0,1,18.6,0.9,42.0,6,2,0,2,0.0,1.0,234.154,1085.28
1,1,18.0,1.1,45.0,6,2,0,2,0.258819,0.965926,219.28,1047.36
2,1,17.7,1.5,45.0,6,2,0,2,0.5,0.866025,205.672,974.88
3,1,16.7,1.4,48.0,6,2,0,2,0.707107,0.707107,167.011,953.76
4,1,18.4,2.8,43.0,6,2,0,2,0.866025,0.5,229.592,986.4


### Test Data Preprocessing

In [16]:
test = pd.read_csv('test.csv')

In [17]:
test['일시'] = pd.to_datetime(test['일시'], format='%Y%m%d %H')
test['month'] = test['일시'].dt.month
test['day'] = test['일시'].dt.weekday
test['hour'] = test['일시'].dt.hour
test['holiday'] = test.apply(lambda x: 0 if x['day']<5 else 1, axis=1)
test['quarter'] = test.apply(lambda x: 2 if x['month']<7 else 3, axis=1)
test['sin_time'] = np.sin(2*np.pi*test.hour/24)
test['cos_time'] = np.cos(2*np.pi*test.hour/24)
test['DI'] = 1.8*test['기온(C)'] - 0.55*(1-test['습도(%)'])*(1.8*test['기온(C)']-26) + 32

test.pop('num_date_time')
test.pop('일시')
test.pop('hour')
test.pop('강수량(mm)')

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
16795    0.0
16796    0.0
16797    0.0
16798    0.0
16799    0.0
Name: 강수량(mm), Length: 16800, dtype: float64

In [75]:
test.to_csv('test_preprocessed.csv', index=False)

In [18]:
test.head()

Unnamed: 0,건물번호,기온(C),풍속(m/s),습도(%),month,day,holiday,quarter,sin_time,cos_time,DI
0,1,23.5,2.2,72,8,3,0,3,0.0,1.0,710.815
1,1,23.0,0.9,72,8,3,0,3,0.258819,0.965926,674.77
2,1,22.7,1.5,75,8,3,0,3,0.5,0.866025,677.662
3,1,22.1,1.3,78,8,3,0,3,0.707107,0.707107,655.363
4,1,21.8,1.0,77,8,3,0,3,0.866025,0.5,624.672


### Training

In [11]:
import os
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV, train_test_split

In [12]:
def SMAPE(true, pred):
    return np.mean((np.abs(true - pred))/(np.abs(true) + np.abs(pred))) * 100    

In [20]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,week,holiday,sin_time,cos_time,DI,power
0,1,18.6,0.0,0.9,42.0,6,2,22,0,0.0,1.0,234.154,1085.28
1,1,18.0,0.0,1.1,45.0,6,2,22,0,0.258819,0.965926,219.28,1047.36
2,1,17.7,0.0,1.5,45.0,6,2,22,0,0.5,0.866025,205.672,974.88
3,1,16.7,0.0,1.4,48.0,6,2,22,0,0.707107,0.707107,167.011,953.76
4,1,18.4,0.0,2.8,43.0,6,2,22,0,0.866025,0.5,229.592,986.4


In [13]:
def tr_ts_split(train, test, number, val_hour):
    x = train.loc[train['건물번호']==number].drop(['건물번호','power'], axis=1)
    y = train.loc[train['건물번호']==number, 'power']

    #if number == 54:
    x = pd.concat([x[:601], x[700:]], axis=0).reset_index(drop=True)
    y = pd.concat([y[:601], y[700:]]).reset_index(drop=True)

    if val_hour == 0:
        return x, None, y, None
    else:
        x_train, x_valid, y_train, y_valid = x[:-val_hour], x[-val_hour:], y[:-val_hour], y[-val_hour:]
        return x_train, x_valid.reset_index(drop=True), y_train, y_valid.reset_index(drop=True)

In [14]:
def fit_and_pred_XGB(train, test, number, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [19]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_XGB(train, test, i+1, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)



===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:02<04:05,  2.48s/it]

SMAPE: 3.4496644441677238
R2: 0.9501395854081043
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:03<02:32,  1.55s/it]

SMAPE: 5.315951214428996
R2: 0.8301183990220398
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:04<01:59,  1.23s/it]

SMAPE: 5.271764315449672
R2: 0.842398546048641
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:05<01:47,  1.12s/it]

SMAPE: 1.9815124791238239
R2: 0.9753420265350734
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:06<01:52,  1.18s/it]

SMAPE: 3.507672159174944
R2: 0.9642913768247501
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:07<01:52,  1.20s/it]

SMAPE: 2.2468971392181447
R2: 0.9689029577666786
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:08<01:45,  1.14s/it]

SMAPE: 4.333272670925453
R2: 0.9060958145060787
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:10<01:56,  1.27s/it]

SMAPE: 2.44325686603149
R2: 0.7351378479755595
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:11<02:06,  1.39s/it]

SMAPE: 2.174352069842542
R2: 0.9717021895248862
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:12<01:52,  1.25s/it]

SMAPE: 3.623798109635635
R2: 0.44647936617082273
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:14<01:59,  1.34s/it]

SMAPE: 1.6545924034733546
R2: 0.9039160007710692
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:16<02:06,  1.43s/it]

SMAPE: 2.5047585567034085
R2: 0.8607990873955604
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:17<02:03,  1.42s/it]

SMAPE: 3.496462420044567
R2: 0.19377698841111735
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:18<01:50,  1.28s/it]

SMAPE: 8.31562679276592
R2: 0.27420760662127597
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:19<01:51,  1.31s/it]

SMAPE: 1.4217463752358752
R2: 0.9168731456692368
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:20<01:42,  1.22s/it]

SMAPE: 2.368078610115772
R2: 0.9806195227640377
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:22<02:04,  1.50s/it]

SMAPE: 2.9656508375499513
R2: 0.9401083053890646
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:24<02:08,  1.57s/it]

SMAPE: 3.2311416309067997
R2: 0.9371458479800351
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:26<02:11,  1.62s/it]

SMAPE: 4.231273640177277
R2: 0.9280465130119451
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:28<02:13,  1.66s/it]

SMAPE: 2.4086710903905
R2: 0.9804349245071076
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:30<02:29,  1.89s/it]

SMAPE: 3.3770193976002156
R2: 0.9500470770330162
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:31<02:13,  1.71s/it]

SMAPE: 1.8336330993480217
R2: 0.9382130094600518
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:33<01:58,  1.54s/it]

SMAPE: 1.0703881958253543
R2: 0.9634547617119086
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:36<02:35,  2.04s/it]

SMAPE: 1.2727294301559726
R2: 0.9852824313493417
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:38<02:38,  2.11s/it]

SMAPE: 1.1191880200013231
R2: 0.9851900337636407
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:39<02:14,  1.82s/it]

SMAPE: 1.1276885328568866
R2: 0.9827724606505066
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:40<01:59,  1.63s/it]

SMAPE: 0.8877781780437648
R2: 0.9807261912170341
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [00:41<01:42,  1.42s/it]

SMAPE: 2.3221491879222964
R2: 0.964697621385352
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [00:42<01:32,  1.30s/it]

SMAPE: 1.9497307397640093
R2: 0.954499683269231
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [00:44<01:36,  1.37s/it]

SMAPE: 3.262110886983496
R2: 0.8821343376374882
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [00:45<01:25,  1.24s/it]

SMAPE: 2.286222426678163
R2: 0.9537938307836888
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [00:46<01:32,  1.36s/it]

SMAPE: 0.18562187252430876
R2: 0.8664244717404739
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [00:49<01:47,  1.60s/it]

SMAPE: 0.19492700143852346
R2: 0.7452709295693078
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [00:50<01:44,  1.58s/it]

SMAPE: 0.4069333175169637
R2: 0.838951580419725
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [00:52<01:41,  1.56s/it]

SMAPE: 0.18111919632754162
R2: 0.9143525264255867
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [00:53<01:41,  1.59s/it]

SMAPE: 0.3678864793522451
R2: 0.55170142899718
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [00:55<01:43,  1.64s/it]

SMAPE: 2.5653169322017284
R2: 0.9796371632028724
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [00:56<01:33,  1.51s/it]

SMAPE: 2.1611257163627924
R2: 0.9872733081225288
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [00:58<01:45,  1.73s/it]

SMAPE: 2.833936725392279
R2: 0.9714542022798921
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [00:59<01:29,  1.50s/it]

SMAPE: 6.205540735134859
R2: 0.8886869490351152
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [01:01<01:25,  1.45s/it]

SMAPE: 2.2851644200225913
R2: 0.9820318252619586
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [01:02<01:13,  1.27s/it]

SMAPE: 8.274269270613065
R2: 0.8370584924174883
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [01:04<01:33,  1.65s/it]

SMAPE: 2.7285613825864723
R2: 0.9954949495113152
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [01:05<01:22,  1.47s/it]

SMAPE: 2.0128495041002146
R2: 0.9959666560246389
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [01:07<01:24,  1.53s/it]

SMAPE: 1.1556823449263338
R2: 0.9682873546324493
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [01:08<01:21,  1.50s/it]

SMAPE: 2.1103256319582893
R2: 0.9537200425099766
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [01:09<01:13,  1.38s/it]

SMAPE: 2.183424388301227
R2: 0.933963695884009
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [01:11<01:15,  1.46s/it]

SMAPE: 0.9635361726167126
R2: 0.9872958847230113
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [01:14<01:36,  1.89s/it]

SMAPE: 1.3878553800953894
R2: 0.9435077218877712
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [01:15<01:23,  1.68s/it]

SMAPE: 1.1352957116676459
R2: 0.9542441233159059
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [01:22<02:33,  3.13s/it]

SMAPE: 1.6720243090897469
R2: 0.9425303753311524
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [01:24<02:16,  2.85s/it]

SMAPE: 2.029422786075006
R2: 0.955202437894411
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [01:25<01:48,  2.31s/it]

SMAPE: 5.418691768799057
R2: 0.9592015997990747
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [01:26<01:33,  2.02s/it]

SMAPE: 10.717394325203726
R2: 0.729673047953564
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [01:28<01:20,  1.79s/it]

SMAPE: 0.8438725449484686
R2: 0.9036370495906844
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [01:29<01:15,  1.71s/it]

SMAPE: 0.4494700932292596
R2: 0.9300431730445985
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [01:30<01:07,  1.56s/it]

SMAPE: 2.280541294700845
R2: 0.986512489034662
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [01:32<01:11,  1.69s/it]

SMAPE: 0.2231532094551641
R2: 0.9332565943360288
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [01:34<01:07,  1.64s/it]

SMAPE: 2.716717274982718
R2: 0.9823230600779824
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [01:35<00:58,  1.46s/it]

SMAPE: 2.8301305277752804
R2: 0.9525928926692715
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [01:36<00:54,  1.40s/it]

SMAPE: 2.217522669031493
R2: 0.945431302748056
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [01:38<00:59,  1.57s/it]

SMAPE: 2.6267121119783226
R2: 0.9295213255792831
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [01:39<00:52,  1.43s/it]

SMAPE: 3.119175823218651
R2: 0.9091582193462128
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [01:40<00:48,  1.35s/it]

SMAPE: 1.8126262818318826
R2: 0.9566443026254512
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [01:41<00:44,  1.28s/it]

SMAPE: 3.8942080695919925
R2: 0.8859604310625422
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [01:43<00:43,  1.28s/it]

SMAPE: 2.2314658496501774
R2: 0.8368733267893029
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [01:44<00:38,  1.17s/it]

SMAPE: 2.6466432039482766
R2: 0.8900485133477126
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [01:45<00:36,  1.14s/it]

SMAPE: 2.3423114989020446
R2: 0.9436158431874874
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [01:46<00:36,  1.16s/it]

SMAPE: 1.1916606092058235
R2: 0.9780297421467358
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [01:47<00:37,  1.26s/it]

SMAPE: 3.518972472970827
R2: 0.899156993488081
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [01:49<00:38,  1.33s/it]

SMAPE: 2.2935611681258927
R2: 0.8729146225976727
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [01:51<00:40,  1.45s/it]

SMAPE: 3.132578670834011
R2: 0.6931869283567429
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [01:52<00:36,  1.35s/it]

SMAPE: 1.6448922618236341
R2: 0.8215476849745222
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [01:54<00:46,  1.78s/it]

SMAPE: 3.091829198821974
R2: 0.9234302367764158
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [01:56<00:41,  1.64s/it]

SMAPE: 1.8271382699356329
R2: 0.9342804871260818
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [01:58<00:42,  1.76s/it]

SMAPE: 2.299507078023605
R2: 0.8133723398619432
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [01:59<00:37,  1.62s/it]

SMAPE: 1.1345072461189332
R2: 0.99401497907423
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [02:01<00:34,  1.56s/it]

SMAPE: 1.4418873861859003
R2: 0.9944477972580092
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [02:02<00:32,  1.53s/it]

SMAPE: 1.4466214044738257
R2: 0.9816634154542934
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [02:06<00:44,  2.23s/it]

SMAPE: 1.9760167512334421
R2: 0.9922143774479174
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [02:07<00:38,  2.01s/it]

SMAPE: 1.7646858407413388
R2: 0.9922195906969818
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [02:12<00:48,  2.72s/it]

SMAPE: 1.9888818168387685
R2: 0.9879553922374126
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [02:14<00:46,  2.71s/it]

SMAPE: 1.3944012311351168
R2: 0.994556179585811
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [02:16<00:37,  2.37s/it]

SMAPE: 1.3363913975891015
R2: 0.9926394219017667
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [02:19<00:38,  2.53s/it]

SMAPE: 1.8625679140677809
R2: 0.945239565956517
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [02:20<00:29,  2.12s/it]

SMAPE: 4.749316983745702
R2: 0.7956339411254415
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [02:21<00:24,  1.89s/it]

SMAPE: 5.609202664308991
R2: 0.8158968817766145
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [02:23<00:19,  1.65s/it]

SMAPE: 3.855480515976241
R2: 0.8521635191811439
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [02:24<00:16,  1.48s/it]

SMAPE: 4.142865825370621
R2: 0.8239519064064422
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [02:29<00:26,  2.68s/it]

SMAPE: 5.151294921599458
R2: 0.9046872170380107
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [02:31<00:22,  2.53s/it]

SMAPE: 6.464254457518306
R2: 0.9303448825494138
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [02:32<00:16,  2.11s/it]

SMAPE: 3.6917956015973634
R2: 0.8615719597936767
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [02:35<00:15,  2.21s/it]

SMAPE: 3.1400777522395535
R2: 0.8844389286474241
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [02:37<00:12,  2.15s/it]

SMAPE: 3.0649084158631332
R2: 0.9027884792613087
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [02:38<00:08,  1.78s/it]

SMAPE: 7.239464462398352
R2: 0.6414458667877924
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [02:39<00:06,  1.65s/it]

SMAPE: 1.444567119181047
R2: 0.968459567188151
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [02:42<00:06,  2.01s/it]

SMAPE: 2.7115380111705054
R2: 0.9245893832209086
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [02:43<00:03,  1.75s/it]

SMAPE: 6.409336428334088
R2: 0.8718903608935654
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [02:44<00:01,  1.60s/it]

SMAPE: 1.7027544934580474
R2: 0.9782284798595801
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [02:46<00:00,  1.66s/it]

SMAPE: 2.6883008831880297
R2: 0.9617914179577062





In [20]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.4496644441677238, 'R2': 0.9501395854081043},
 {'건물번호': 2, 'SMAPE': 5.315951214428996, 'R2': 0.8301183990220398},
 {'건물번호': 3, 'SMAPE': 5.271764315449672, 'R2': 0.842398546048641},
 {'건물번호': 4, 'SMAPE': 1.9815124791238239, 'R2': 0.9753420265350734},
 {'건물번호': 5, 'SMAPE': 3.507672159174944, 'R2': 0.9642913768247501},
 {'건물번호': 6, 'SMAPE': 2.2468971392181447, 'R2': 0.9689029577666786},
 {'건물번호': 7, 'SMAPE': 4.333272670925453, 'R2': 0.9060958145060787},
 {'건물번호': 8, 'SMAPE': 2.44325686603149, 'R2': 0.7351378479755595},
 {'건물번호': 9, 'SMAPE': 2.174352069842542, 'R2': 0.9717021895248862},
 {'건물번호': 10, 'SMAPE': 3.623798109635635, 'R2': 0.44647936617082273},
 {'건물번호': 11, 'SMAPE': 1.6545924034733546, 'R2': 0.9039160007710692},
 {'건물번호': 12, 'SMAPE': 2.5047585567034085, 'R2': 0.8607990873955604},
 {'건물번호': 13, 'SMAPE': 3.496462420044567, 'R2': 0.19377698841111735},
 {'건물번호': 14, 'SMAPE': 8.31562679276592, 'R2': 0.27420760662127597},
 {'건물번호': 15, 'SMAPE': 1.4217463752358

In [21]:
smape_val

2.7227749700016766

In [71]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = pred_test_whole
sub.to_csv('xgb_sub_15.csv', index=False)

### Improvement

In [22]:
train['적정온도차이'] = train['기온(C)'] - 26

In [23]:
test['적정온도차이'] = test['기온(C)'] - 26

In [34]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,holiday,sin_time,cos_time,DI,power,적정온도차이
0,1,18.6,0.0,0.9,42.0,6,2,0,0.0,1.0,234.154,1085.28,-7.4
1,1,18.0,0.0,1.1,45.0,6,2,0,0.258819,0.965926,219.28,1047.36,-8.0
2,1,17.7,0.0,1.5,45.0,6,2,0,0.5,0.866025,205.672,974.88,-8.3
3,1,16.7,0.0,1.4,48.0,6,2,0,0.707107,0.707107,167.011,953.76,-9.3
4,1,18.4,0.0,2.8,43.0,6,2,0,0.866025,0.5,229.592,986.4,-7.6


In [24]:
def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

In [25]:
def fit_and_pred_XGB_with_weight(train, test, number, weight, seed=0):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, number, 7*24)

    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, seed=seed,
                            gpu_id=0, tree_method='gpu_hist', predictor='gpu_predictor')
    xgb_reg.set_params(**{'objective':weighted_mse(weight)})
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)],
                early_stopping_rounds=300, verbose=False)
    pred = xgb_reg.predict(x_valid)
    pred = pd.Series(pred)
    
    x_test = test[test['건물번호']==number]
    y_test_pred = xgb_reg.predict(x_test.drop('건물번호', axis=1))

    smape_val = SMAPE(y_valid, pred)
    r2_val = sklearn.metrics.r2_score(y_valid, pred)
    print(f'SMAPE: {smape_val}')
    print(f'R2: {r2_val}')
    return smape_val, r2_val, y_valid, pred, y_test_pred

In [26]:
pred_val_ens= []
test_ens = []

summary_list = []
ans_val_list = []
pred_val_list = []
pred_test_list = []

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    summary_dict = {}

    smape_val, r2_val, ans_val, pred_val, pred_test = fit_and_pred_XGB_with_weight(train, test, i+1, 100, seed=0)

    summary_dict['건물번호'] = i+1
    summary_dict['SMAPE'] = smape_val
    summary_dict['R2'] = r2_val
    summary_list.append(summary_dict)
    ans_val_list.append(ans_val)
    pred_val_list.append(pred_val)
    pred_test_list.append(pred_test)

ans_val_whole = np.concatenate(ans_val_list)
pred_val_whole = np.concatenate(pred_val_list)
pred_test_whole = np.concatenate(pred_test_list)
smape_val =SMAPE(ans_val_whole, pred_val_whole)



===== < BUILDING NO.1 > =====


  1%|          | 1/100 [00:01<03:09,  1.92s/it]

SMAPE: 3.958089806818016
R2: 0.942688600343887
===== < BUILDING NO.2 > =====


  2%|▏         | 2/100 [00:03<02:30,  1.54s/it]

SMAPE: 5.423590043612395
R2: 0.8202690087147659
===== < BUILDING NO.3 > =====


  3%|▎         | 3/100 [00:04<02:12,  1.37s/it]

SMAPE: 5.4817390200429115
R2: 0.8344428371312368
===== < BUILDING NO.4 > =====


  4%|▍         | 4/100 [00:05<02:13,  1.39s/it]

SMAPE: 2.030205401726124
R2: 0.9739141290440244
===== < BUILDING NO.5 > =====


  5%|▌         | 5/100 [00:08<02:42,  1.71s/it]

SMAPE: 2.827683287140875
R2: 0.9722565011568478
===== < BUILDING NO.6 > =====


  6%|▌         | 6/100 [00:09<02:45,  1.76s/it]

SMAPE: 1.8545761225980184
R2: 0.9814595210574545
===== < BUILDING NO.7 > =====


  7%|▋         | 7/100 [00:11<02:31,  1.63s/it]

SMAPE: 5.067414580170736
R2: 0.8952315004993333
===== < BUILDING NO.8 > =====


  8%|▊         | 8/100 [00:12<02:28,  1.62s/it]

SMAPE: 2.5219085058843516
R2: 0.7352700104528325
===== < BUILDING NO.9 > =====


  9%|▉         | 9/100 [00:14<02:33,  1.69s/it]

SMAPE: 2.138218998898596
R2: 0.9743442644232198
===== < BUILDING NO.10 > =====


 10%|█         | 10/100 [00:16<02:21,  1.58s/it]

SMAPE: 3.8797821999979956
R2: 0.33503565082129705
===== < BUILDING NO.11 > =====


 11%|█         | 11/100 [00:17<02:24,  1.62s/it]

SMAPE: 1.6987581798917475
R2: 0.9046754964654331
===== < BUILDING NO.12 > =====


 12%|█▏        | 12/100 [00:21<03:08,  2.15s/it]

SMAPE: 1.9467450064152463
R2: 0.8963663764505699
===== < BUILDING NO.13 > =====


 13%|█▎        | 13/100 [00:22<02:55,  2.02s/it]

SMAPE: 2.9932188552135015
R2: 0.40865056641057806
===== < BUILDING NO.14 > =====


 14%|█▍        | 14/100 [00:24<02:33,  1.79s/it]

SMAPE: 8.26659468656458
R2: 0.2823081380570013
===== < BUILDING NO.15 > =====


 15%|█▌        | 15/100 [00:25<02:27,  1.74s/it]

SMAPE: 1.2563441410179228
R2: 0.9361501673771199
===== < BUILDING NO.16 > =====


 16%|█▌        | 16/100 [00:27<02:17,  1.64s/it]

SMAPE: 2.2470956034124576
R2: 0.984438739447256
===== < BUILDING NO.17 > =====


 17%|█▋        | 17/100 [00:29<02:26,  1.76s/it]

SMAPE: 2.938767363334106
R2: 0.9472182897600329
===== < BUILDING NO.18 > =====


 18%|█▊        | 18/100 [00:31<02:33,  1.87s/it]

SMAPE: 3.340749478564642
R2: 0.9344259513596916
===== < BUILDING NO.19 > =====


 19%|█▉        | 19/100 [00:33<02:47,  2.06s/it]

SMAPE: 4.259865603359473
R2: 0.9377093326699545
===== < BUILDING NO.20 > =====


 20%|██        | 20/100 [00:35<02:39,  2.00s/it]

SMAPE: 2.3293363509733234
R2: 0.9763751637377249
===== < BUILDING NO.21 > =====


 21%|██        | 21/100 [00:37<02:31,  1.91s/it]

SMAPE: 3.9604546517618617
R2: 0.9346681212737444
===== < BUILDING NO.22 > =====


 22%|██▏       | 22/100 [00:39<02:25,  1.86s/it]

SMAPE: 1.8407103321087341
R2: 0.9416497903244264
===== < BUILDING NO.23 > =====


 23%|██▎       | 23/100 [00:41<02:38,  2.05s/it]

SMAPE: 0.9207817579819024
R2: 0.9670461512892325
===== < BUILDING NO.24 > =====


 24%|██▍       | 24/100 [00:43<02:39,  2.09s/it]

SMAPE: 1.2691524328261534
R2: 0.9860559928149585
===== < BUILDING NO.25 > =====


 25%|██▌       | 25/100 [00:46<02:40,  2.14s/it]

SMAPE: 1.049760042455599
R2: 0.9888383987705014
===== < BUILDING NO.26 > =====


 26%|██▌       | 26/100 [00:47<02:27,  2.00s/it]

SMAPE: 0.9773173816754479
R2: 0.9862398667859699
===== < BUILDING NO.27 > =====


 27%|██▋       | 27/100 [00:49<02:19,  1.91s/it]

SMAPE: 0.867737563148512
R2: 0.981536154919862
===== < BUILDING NO.28 > =====


 28%|██▊       | 28/100 [00:50<02:04,  1.73s/it]

SMAPE: 2.704559082680852
R2: 0.9587023328344311
===== < BUILDING NO.29 > =====


 29%|██▉       | 29/100 [00:52<01:57,  1.66s/it]

SMAPE: 1.9643587507454325
R2: 0.9533188152654972
===== < BUILDING NO.30 > =====


 30%|███       | 30/100 [00:53<01:53,  1.62s/it]

SMAPE: 3.5481102890599323
R2: 0.88620846508242
===== < BUILDING NO.31 > =====


 31%|███       | 31/100 [00:55<01:46,  1.54s/it]

SMAPE: 2.567334174154841
R2: 0.94394935900536
===== < BUILDING NO.32 > =====


 32%|███▏      | 32/100 [00:57<01:57,  1.73s/it]

SMAPE: 0.19720914074014337
R2: 0.8552045268656359
===== < BUILDING NO.33 > =====


 33%|███▎      | 33/100 [01:00<02:28,  2.22s/it]

SMAPE: 0.22694591032733172
R2: 0.68446200136358
===== < BUILDING NO.34 > =====


 34%|███▍      | 34/100 [01:03<02:34,  2.35s/it]

SMAPE: 0.3387897476067479
R2: 0.871465093870818
===== < BUILDING NO.35 > =====


 35%|███▌      | 35/100 [01:05<02:34,  2.37s/it]

SMAPE: 0.16554927907406108
R2: 0.930644493744941
===== < BUILDING NO.36 > =====


 36%|███▌      | 36/100 [01:08<02:38,  2.48s/it]

SMAPE: 0.324572880762238
R2: 0.6041902251379345
===== < BUILDING NO.37 > =====


 37%|███▋      | 37/100 [01:10<02:24,  2.29s/it]

SMAPE: 2.1960751465586466
R2: 0.9806661021843802
===== < BUILDING NO.38 > =====


 38%|███▊      | 38/100 [01:12<02:13,  2.15s/it]

SMAPE: 1.9340681188414415
R2: 0.9973945579051627
===== < BUILDING NO.39 > =====


 39%|███▉      | 39/100 [01:13<02:01,  1.99s/it]

SMAPE: 2.6959262786661693
R2: 0.978623042488991
===== < BUILDING NO.40 > =====


 40%|████      | 40/100 [01:15<01:50,  1.84s/it]

SMAPE: 4.205767008048054
R2: 0.9647470373154696
===== < BUILDING NO.41 > =====


 41%|████      | 41/100 [01:16<01:46,  1.80s/it]

SMAPE: 2.4855378207707686
R2: 0.9900449752317494
===== < BUILDING NO.42 > =====


 42%|████▏     | 42/100 [01:18<01:34,  1.63s/it]

SMAPE: 8.886334639541975
R2: 0.8211249543964603
===== < BUILDING NO.43 > =====


 43%|████▎     | 43/100 [01:19<01:34,  1.65s/it]

SMAPE: 3.142299275684015
R2: 0.9935884405681805
===== < BUILDING NO.44 > =====


 44%|████▍     | 44/100 [01:21<01:30,  1.61s/it]

SMAPE: 2.017980540625075
R2: 0.9963475195671331
===== < BUILDING NO.45 > =====


 45%|████▌     | 45/100 [01:23<01:34,  1.72s/it]

SMAPE: 1.2202206810559018
R2: 0.9634003864073052
===== < BUILDING NO.46 > =====


 46%|████▌     | 46/100 [01:24<01:31,  1.69s/it]

SMAPE: 2.2453486787421926
R2: 0.9497410894929194
===== < BUILDING NO.47 > =====


 47%|████▋     | 47/100 [01:26<01:28,  1.66s/it]

SMAPE: 2.1547084290582728
R2: 0.9376816248035876
===== < BUILDING NO.48 > =====


 48%|████▊     | 48/100 [01:28<01:27,  1.69s/it]

SMAPE: 1.012755865008746
R2: 0.9866580578024471
===== < BUILDING NO.49 > =====


 49%|████▉     | 49/100 [01:30<01:35,  1.88s/it]

SMAPE: 1.261882648770264
R2: 0.9506944612100799
===== < BUILDING NO.50 > =====


 50%|█████     | 50/100 [01:32<01:30,  1.81s/it]

SMAPE: 1.1774256184351086
R2: 0.9382771255971869
===== < BUILDING NO.51 > =====


 51%|█████     | 51/100 [01:34<01:29,  1.82s/it]

SMAPE: 1.76382256015568
R2: 0.9380209906080006
===== < BUILDING NO.52 > =====


 52%|█████▏    | 52/100 [01:36<01:31,  1.90s/it]

SMAPE: 1.6804617829057673
R2: 0.9701209576445452
===== < BUILDING NO.53 > =====


 53%|█████▎    | 53/100 [01:37<01:22,  1.75s/it]

SMAPE: 5.845478155723059
R2: 0.9522922672614174
===== < BUILDING NO.54 > =====


 54%|█████▍    | 54/100 [01:39<01:19,  1.72s/it]

SMAPE: 10.349529164265277
R2: 0.8227356252535516
===== < BUILDING NO.55 > =====


 55%|█████▌    | 55/100 [01:41<01:17,  1.73s/it]

SMAPE: 0.7954576733665512
R2: 0.9207194335232134
===== < BUILDING NO.56 > =====


 56%|█████▌    | 56/100 [01:43<01:22,  1.87s/it]

SMAPE: 0.36454789539968774
R2: 0.956043892706144
===== < BUILDING NO.57 > =====


 57%|█████▋    | 57/100 [01:44<01:17,  1.80s/it]

SMAPE: 2.602961978510018
R2: 0.9852399391579665
===== < BUILDING NO.58 > =====


 58%|█████▊    | 58/100 [01:46<01:19,  1.88s/it]

SMAPE: 0.2140446799035083
R2: 0.9417370926698515
===== < BUILDING NO.59 > =====


 59%|█████▉    | 59/100 [01:48<01:13,  1.80s/it]

SMAPE: 2.57593600262091
R2: 0.9806958621949143
===== < BUILDING NO.60 > =====


 60%|██████    | 60/100 [01:50<01:08,  1.71s/it]

SMAPE: 3.1294851892009734
R2: 0.943293418791736
===== < BUILDING NO.61 > =====


 61%|██████    | 61/100 [01:51<01:04,  1.66s/it]

SMAPE: 2.4241400697475286
R2: 0.9329405959549734
===== < BUILDING NO.62 > =====


 62%|██████▏   | 62/100 [01:53<01:01,  1.61s/it]

SMAPE: 2.7296138619808215
R2: 0.9264455657548724
===== < BUILDING NO.63 > =====


 63%|██████▎   | 63/100 [01:54<00:58,  1.58s/it]

SMAPE: 3.137687105059982
R2: 0.9153071094077435
===== < BUILDING NO.64 > =====


 64%|██████▍   | 64/100 [01:56<00:55,  1.55s/it]

SMAPE: 1.9728113827839104
R2: 0.9465342574200263
===== < BUILDING NO.65 > =====


 65%|██████▌   | 65/100 [01:57<00:52,  1.50s/it]

SMAPE: 4.299223299773507
R2: 0.8762872841097383
===== < BUILDING NO.66 > =====


 66%|██████▌   | 66/100 [01:59<00:52,  1.53s/it]

SMAPE: 2.1522566600961635
R2: 0.8571766754142797
===== < BUILDING NO.67 > =====


 67%|██████▋   | 67/100 [02:00<00:48,  1.48s/it]

SMAPE: 2.74384280272047
R2: 0.8830061209811291
===== < BUILDING NO.68 > =====


 68%|██████▊   | 68/100 [02:01<00:47,  1.47s/it]

SMAPE: 2.4851979680719816
R2: 0.9311362553108149
===== < BUILDING NO.69 > =====


 69%|██████▉   | 69/100 [02:03<00:47,  1.52s/it]

SMAPE: 1.1998130765926869
R2: 0.977139652494653
===== < BUILDING NO.70 > =====


 70%|███████   | 70/100 [02:05<00:49,  1.66s/it]

SMAPE: 2.5906605631607684
R2: 0.9505012664788661
===== < BUILDING NO.71 > =====


 71%|███████   | 71/100 [02:07<00:48,  1.66s/it]

SMAPE: 2.2288104240237754
R2: 0.8905960554177875
===== < BUILDING NO.72 > =====


 72%|███████▏  | 72/100 [02:09<00:52,  1.86s/it]

SMAPE: 2.341936241720028
R2: 0.788323527296303
===== < BUILDING NO.73 > =====


 73%|███████▎  | 73/100 [02:11<00:48,  1.81s/it]

SMAPE: 1.6812495884969443
R2: 0.8191011477677099
===== < BUILDING NO.74 > =====


 74%|███████▍  | 74/100 [02:13<00:49,  1.89s/it]

SMAPE: 3.1590155267692923
R2: 0.9215653654800202
===== < BUILDING NO.75 > =====


 75%|███████▌  | 75/100 [02:14<00:44,  1.78s/it]

SMAPE: 1.7684096629510642
R2: 0.939415150609647
===== < BUILDING NO.76 > =====


 76%|███████▌  | 76/100 [02:16<00:41,  1.71s/it]

SMAPE: 2.4262653195951516
R2: 0.7935494272807322
===== < BUILDING NO.77 > =====


 77%|███████▋  | 77/100 [02:18<00:39,  1.71s/it]

SMAPE: 1.2476837868169748
R2: 0.9924322216808157
===== < BUILDING NO.78 > =====


 78%|███████▊  | 78/100 [02:19<00:38,  1.74s/it]

SMAPE: 1.5595962083409516
R2: 0.9933560836140741
===== < BUILDING NO.79 > =====


 79%|███████▉  | 79/100 [02:21<00:38,  1.84s/it]

SMAPE: 1.0350976727996473
R2: 0.9910165228236005
===== < BUILDING NO.80 > =====


 80%|████████  | 80/100 [02:23<00:38,  1.90s/it]

SMAPE: 1.6400788047320642
R2: 0.9945570325609523
===== < BUILDING NO.81 > =====


 81%|████████  | 81/100 [02:25<00:35,  1.89s/it]

SMAPE: 1.8059067018017305
R2: 0.9914125998742644
===== < BUILDING NO.82 > =====


 82%|████████▏ | 82/100 [02:27<00:33,  1.86s/it]

SMAPE: 2.100320803873537
R2: 0.9917765402321854
===== < BUILDING NO.83 > =====


 83%|████████▎ | 83/100 [02:29<00:31,  1.82s/it]

SMAPE: 1.4810436987775253
R2: 0.9933915973581001
===== < BUILDING NO.84 > =====


 84%|████████▍ | 84/100 [02:31<00:28,  1.79s/it]

SMAPE: 1.4418869799494027
R2: 0.9909055342089284
===== < BUILDING NO.85 > =====


 85%|████████▌ | 85/100 [02:33<00:28,  1.91s/it]

SMAPE: 1.7826386026152627
R2: 0.9535375682337994
===== < BUILDING NO.86 > =====


 86%|████████▌ | 86/100 [02:35<00:27,  1.95s/it]

SMAPE: 3.742093518187122
R2: 0.8742573123270703
===== < BUILDING NO.87 > =====


 87%|████████▋ | 87/100 [02:36<00:24,  1.86s/it]

SMAPE: 4.890166003373403
R2: 0.881452065133602
===== < BUILDING NO.88 > =====


 88%|████████▊ | 88/100 [02:38<00:21,  1.76s/it]

SMAPE: 2.9714834425101118
R2: 0.943396775921006
===== < BUILDING NO.89 > =====


 89%|████████▉ | 89/100 [02:40<00:19,  1.73s/it]

SMAPE: 2.86748248519345
R2: 0.9394340533397157
===== < BUILDING NO.90 > =====


 90%|█████████ | 90/100 [02:41<00:17,  1.76s/it]

SMAPE: 3.106183633377735
R2: 0.9678364868124525
===== < BUILDING NO.91 > =====


 91%|█████████ | 91/100 [02:43<00:15,  1.76s/it]

SMAPE: 6.369985354698693
R2: 0.9491040321602547
===== < BUILDING NO.92 > =====


 92%|█████████▏| 92/100 [02:45<00:14,  1.75s/it]

SMAPE: 2.8757107303313627
R2: 0.9392718993007843
===== < BUILDING NO.93 > =====


 93%|█████████▎| 93/100 [02:47<00:12,  1.74s/it]

SMAPE: 3.3389358731016823
R2: 0.8707269802560169
===== < BUILDING NO.94 > =====


 94%|█████████▍| 94/100 [02:49<00:10,  1.78s/it]

SMAPE: 2.9997516435458205
R2: 0.9164332268349072
===== < BUILDING NO.95 > =====


 95%|█████████▌| 95/100 [02:50<00:08,  1.61s/it]

SMAPE: 7.379615894826802
R2: 0.62629859743109
===== < BUILDING NO.96 > =====


 96%|█████████▌| 96/100 [02:51<00:06,  1.60s/it]

SMAPE: 1.4984665415299465
R2: 0.9649112666172949
===== < BUILDING NO.97 > =====


 97%|█████████▋| 97/100 [02:54<00:05,  1.84s/it]

SMAPE: 2.4520487961565443
R2: 0.9308615146938714
===== < BUILDING NO.98 > =====


 98%|█████████▊| 98/100 [02:55<00:03,  1.72s/it]

SMAPE: 6.996710563035973
R2: 0.8523448293077341
===== < BUILDING NO.99 > =====


 99%|█████████▉| 99/100 [02:57<00:01,  1.70s/it]

SMAPE: 1.694317434400067
R2: 0.9789202633621266
===== < BUILDING NO.100 > =====


100%|██████████| 100/100 [02:59<00:00,  1.79s/it]

SMAPE: 2.734539119499041
R2: 0.9661826648531509





In [27]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.958089806818016, 'R2': 0.942688600343887},
 {'건물번호': 2, 'SMAPE': 5.423590043612395, 'R2': 0.8202690087147659},
 {'건물번호': 3, 'SMAPE': 5.4817390200429115, 'R2': 0.8344428371312368},
 {'건물번호': 4, 'SMAPE': 2.030205401726124, 'R2': 0.9739141290440244},
 {'건물번호': 5, 'SMAPE': 2.827683287140875, 'R2': 0.9722565011568478},
 {'건물번호': 6, 'SMAPE': 1.8545761225980184, 'R2': 0.9814595210574545},
 {'건물번호': 7, 'SMAPE': 5.067414580170736, 'R2': 0.8952315004993333},
 {'건물번호': 8, 'SMAPE': 2.5219085058843516, 'R2': 0.7352700104528325},
 {'건물번호': 9, 'SMAPE': 2.138218998898596, 'R2': 0.9743442644232198},
 {'건물번호': 10, 'SMAPE': 3.8797821999979956, 'R2': 0.33503565082129705},
 {'건물번호': 11, 'SMAPE': 1.6987581798917475, 'R2': 0.9046754964654331},
 {'건물번호': 12, 'SMAPE': 1.9467450064152463, 'R2': 0.8963663764505699},
 {'건물번호': 13, 'SMAPE': 2.9932188552135015, 'R2': 0.40865056641057806},
 {'건물번호': 14, 'SMAPE': 8.26659468656458, 'R2': 0.2823081380570013},
 {'건물번호': 15, 'SMAPE': 1.25634414101

In [28]:
smape_val

2.646947603316238

In [29]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = pred_test_whole
sub.to_csv('xgb_sub_27.csv', index=False)

### Parameter Tuning - Grid Search CV

In [48]:
from sklearn.model_selection import GridSearchCV, PredefinedSplit

In [None]:
df = pd.DataFrame(columns = ['n_estimators', 'eta', 'min_child_weight', 'max_depth', 'colsample_bytree', 'subsample'])
preds = np.array([])
grid = {
    'n_estimators':     [30, 50, 70, 100],
    'eta':              [0.01],
    'min_child_weight': np.arange(1, 8, 1),
    'max_depth':        np.arange(3, 9, 1),
    'colsample_bytree': np.arange(0.8, 1.0, 0.1),
    'subsample':        np.arange(0.8, 1.0, 0.1)
}

for i in tqdm(range(100)):
    print(f"===== < BUILDING NO.{i+1} > =====")
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    gcv = GridSearchCV(estimator=XGBRegressor(seed=0, gpu_id=0,
                                              tree_method='gpu_hist', predictor='gpu_predictor'),
                       param_grid=grid,
                       scoring=make_scorer(SMAPE, greater_is_better=False),
                       cv=5,
                       refit=True,
                       verbose=True)
    gcv.fit(x_train, y_train)
    best = gcv.best_estimator_
    params = gcv.best_params_
    print(params)
    pred = best.predict(x_valid)
    print(f'SMAPE: {SMAPE(y_valid, pred)}')
    preds = np.append(preds, pred)
    df = pd.concat([df, pd.DataFrame(params, index=[0])], axis=0)

  0%|          | 0/100 [00:00<?, ?it/s]

===== < BUILDING NO.1 > =====
Fitting 5 folds for each of 672 candidates, totalling 3360 fits


In [133]:
df.to_csv('hyperparameter_xgb.csv', index=False)

In [None]:
xgb_params = df

In [None]:
scores = []
best_iter = []

for i in tqdm(range(100)):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    xgb_reg = XGBRegressor(n_estimators=10000, eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                            max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4],
                            subsample=xgb_params.iloc[i, 5], seed=0)
    xgb_reg.set_params(**{'objective':weighted_mse(100)})
    xgb_reg.set_params(early_stopping_rounds=300)
    xgb_reg.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_valid, y_valid)], verbose=False)
    y_pred = xgb_reg.predict(x_valid)
    pred = pd.Series(y_pred)
    sm = SMAPE(y_valid, y_pred)
    scores.append(sm)
    best_iter.append(xgb_reg.best_iteration) # 실제 best iteration은 이 값에 +1

In [None]:
alpha_list = []
smape_list = []

for i in tqdm(range(100)):
    x_train, x_valid, y_train, y_valid = tr_ts_split(train, test, i+1, 7*24)
    xgb = XGBRegressor(seed=0,
                        n_estimators=best_iter[i], eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                        max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
    xgb.fit(x_train, y_train)
    pred0 = xgb.predict(x_valid)
    best_alpha = 0
    score0 = SMAPE(y_valid, pred0)

    for j in [1, 2, 5, 7, 10, 25, 50, 75, 100]:
        xgb = XGBRegressor(seed=0,
                        n_estimators=best_iter[i], eta=0.01, min_child_weight=xgb_params.iloc[i, 2],
                        max_depth=xgb_params.iloc[i, 3], colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
        xgb.set_params(**{'objective': weighted_mse(j)})
        xgb.fit(x_train, y_train)
        pred1 = xgb.predict(x_valid)
        score1 = SMAPE(y_valid, pred1)
        if score1 < score0:
            best_alpha = j
            score0 = score1

    alpha_list.append(best_alpha)
    smape_list.append(score0)
    print(f"building {i+1} || best score: {score0} || alpha: {best_alpha}")

In [None]:
xgb_params['alpha'] = alpha_list
xgb_params['best_iter'] = best_iter
xgb_params.head()

In [None]:
xgb_params.to_csv('hyperparameter_xgb_real_last_testing.csv', index=False)

In [None]:
best_iter = xgb_params['best_iter'].to_list()
best_iter[0]

In [212]:
train.head()

Unnamed: 0,건물번호,기온(C),강수량(mm),풍속(m/s),습도(%),month,day,week,holiday,sin_time,cos_time,DI,power,연면적(m2),냉방면적(m2),태양광용량(kW),ESS저장용량(kWh),PCS용량(kW),건물유형_건물기타,건물유형_공공,건물유형_대학교,건물유형_데이터센터,건물유형_백화점및아울렛,건물유형_병원,건물유형_상용,건물유형_아파트,건물유형_연구소,건물유형_지식산업센터,건물유형_할인마트,건물유형_호텔및리조트,CDH
0,1,18.6,0.0,0.9,42.0,6,2,22,0,0.0,1.0,234.154,1085.28,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-7.4
1,1,18.0,0.0,1.1,45.0,6,2,22,0,0.258819,0.965926,219.28,1047.36,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-15.4
2,1,17.7,0.0,1.5,45.0,6,2,22,0,0.5,0.866025,205.672,974.88,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-23.7
3,1,16.7,0.0,1.4,48.0,6,2,22,0,0.707107,0.707107,167.011,953.76,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-33.0
4,1,18.4,0.0,2.8,43.0,6,2,22,0,0.866025,0.5,229.592,986.4,110634.0,39570.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,-40.6


In [None]:
preds = np.array([])
for i in tqdm(range(100)):
    pred_df = pd.DataFrame()
    for seed in range(0, 6):
        x_train = train.loc[train['건물번호']==i+1,].drop(['건물번호', 'power'], axis=1)
        y_train = train.loc[train['건물번호']==i+1, 'power']
        x_test = test.loc[test['건물번호']==i+1,].drop('건물번호', axis=1)

        xgb = XGBRegressor(seed=seed, n_estimators=best_iter[i], eta=0.01,
                            min_child_weight=xgb_params.iloc[i, 2], max_depth=xgb_params.iloc[i, 3],
                            colsample_bytree=xgb_params.iloc[i, 4], subsample=xgb_params.iloc[i, 5])
        if xgb_params.iloc[i, 6] != 0:
            xgb.set_params(**{'objective':weighted_mse(xgb_params.iloc[i, 6])})

        xgb.fit(x_train, y_train)
        y_pred = xgb.predict(x_test)
        pred_df.loc[:, seed] = y_pred

    pred = pred_df.mean(axis=1)
    preds = np.append(preds, pred)

In [None]:
sub = pd.read_csv('sample_submission.csv')
sub['answer'] = preds
sub.to_csv('xgb_sub_22.csv', index=False)

In [215]:
summary_list

[{'건물번호': 1, 'SMAPE': 3.5419388838477404, 'R2': 0.9563832166343148},
 {'건물번호': 2, 'SMAPE': 4.295780152455329, 'R2': 0.8651174662607124},
 {'건물번호': 3, 'SMAPE': 4.976127431146375, 'R2': 0.8282886548210623},
 {'건물번호': 4, 'SMAPE': 1.916974758959043, 'R2': 0.9749491993399805},
 {'건물번호': 5, 'SMAPE': 2.8188883844313564, 'R2': 0.972446435735248},
 {'건물번호': 6, 'SMAPE': 1.8247299710309055, 'R2': 0.9802984729616211},
 {'건물번호': 7, 'SMAPE': 4.181533402777831, 'R2': 0.906970899840581},
 {'건물번호': 8, 'SMAPE': 2.8739138223687473, 'R2': 0.6702282399188223},
 {'건물번호': 9, 'SMAPE': 1.8549493161639723, 'R2': 0.984736771457575},
 {'건물번호': 10, 'SMAPE': 3.5416225095446876, 'R2': 0.4322418016566194},
 {'건물번호': 11, 'SMAPE': 1.686647477272509, 'R2': 0.8955227858960689},
 {'건물번호': 12, 'SMAPE': 1.9311341180054387, 'R2': 0.8995117473179703},
 {'건물번호': 13, 'SMAPE': 3.004711373509035, 'R2': 0.3961679661426334},
 {'건물번호': 14, 'SMAPE': 7.909728524427743, 'R2': 0.34326299780211256},
 {'건물번호': 15, 'SMAPE': 1.8971932943063

In [57]:
train[train['건물번호']==54].head(24)

Unnamed: 0,num_date_time,건물번호,일시,기온(C),강수량(mm),풍속(m/s),습도(%),일조(hr),일사(MJ/m2),전력소비량(kWh)
108120,54_20220601 00,54,20220601 00,18.6,,2.6,62.0,,,578.16
108121,54_20220601 01,54,20220601 01,18.8,,0.4,62.0,,,408.24
108122,54_20220601 02,54,20220601 02,18.9,,0.0,52.0,,,398.16
108123,54_20220601 03,54,20220601 03,20.2,,2.4,38.0,,,387.36
108124,54_20220601 04,54,20220601 04,20.4,,2.8,40.0,,,387.36
108125,54_20220601 05,54,20220601 05,20.0,,1.7,41.0,,,419.76
108126,54_20220601 06,54,20220601 06,19.6,,0.0,47.0,0.0,0.04,612.72
108127,54_20220601 07,54,20220601 07,21.2,,2.0,38.0,0.0,0.29,661.68
108128,54_20220601 08,54,20220601 08,22.9,,1.3,36.0,0.0,0.69,1210.32
108129,54_20220601 09,54,20220601 09,25.6,,0.3,32.0,0.8,1.43,1901.16


In [52]:
bi = pd.read_csv('building_info.csv')

In [54]:
bi[bi['건물번호']==54]

Unnamed: 0,건물번호,건물유형,연면적(m2),냉방면적(m2),태양광용량(kW),ESS저장용량(kWh),PCS용량(kW)
53,54,상용,109400.2,65803.57,-,-,-


test와 train의 분포 차이 확인
random forest 모델 확인
과대 예측 잡기
54번 건물 주의(튀는 애 발견, outlier 제거 후 시도 예를 들면 뒤에서 1000개 빼서 수행)
feature selection 요망