In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import random
import numpy as np
import os
from tqdm import tqdm
import xgboost

In [54]:
train_df = pd.read_csv('./Data/train.csv')
building_info = pd.read_csv('./Data/building_info.csv')
test_df = pd.read_csv('./Data/test.csv')

In [55]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

In [56]:
train_df = train_df.fillna(0)

In [57]:
# 파생변수 THI, CDH
train_df['THI'] = 9/5*train_df['기온(C)'] - 0.55*(1-train_df['습도(%)']/100)*(9/5*train_df['습도(%)']-26)+32

def CDH(xs):
    ys = []
    for i in range(len(xs)):
        if i < 11:
            ys.append(np.sum(xs[:(i+1)]-26))
        else:
            ys.append(np.sum(xs[(i-11):(i+1)]-26))
    return np.array(ys)

cdhs = np.array([])

for num in range(1,101,1):
    temp = train_df[train_df['건물번호'] == num]
    cdh = CDH(temp['기온(C)'].values)
    cdhs = np.concatenate([cdhs, cdh])
    
train_df['CDH'] = cdhs

In [58]:
# Weekday 변수 추가
import datetime

def to_datetime(s):
    """
    Args:
        s: ex) '20220601 01'
    Returns:
        weekday: 0~6(int), 0: 월요일, 1: 화요일, ...
    """
    s = s.split()[0]  # 20220601
    date = datetime.datetime.strptime(s, '%Y%m%d')
    weekday = date.weekday()  
    return weekday

In [59]:
train_df['Weekday'] = train_df.apply(lambda x:to_datetime(x['일시']), axis=1)

In [60]:
# 전날과의 차이 변수 추가
for i in range(1, 101):
    df = train_df[train_df['건물번호'] == i]
    train_df.loc[df.index, '기온_gap'] = df['기온(C)'] - df.shift(1)['기온(C)']
    train_df.loc[df.index, '풍속_gap'] = df['풍속(m/s)'] - df.shift(1)['풍속(m/s)']
    train_df.loc[df.index, '습도_gap'] = df['습도(%)'] - df.shift(1)['습도(%)']

In [61]:
train_df['기온_gap'] = train_df['기온_gap'].fillna(0)
train_df['풍속_gap'] = train_df['풍속_gap'].fillna(0)
train_df['습도_gap'] = train_df['습도_gap'].fillna(0)

In [62]:
def train_test_split(df, th):
    train = df[df['일시'].str[:8].astype(int) < th].reset_index(drop=True)
    test = df[df['일시'].str[:8].astype(int) >= th].reset_index(drop=True)
    return train, test

In [63]:
def preprocess_x(df):
    to_remove_columns = ['num_date_time', '일시', '일조(hr)', '일사(MJ/m2)', '전력소비량(kWh)']
    df = df.fillna(0)
    #시계열 특성을 학습에 반영하기 위해 일시를 월, 일, 시간으로 나눕니다
    df['month'] = df['일시'].apply(lambda x : int(x[4:6]))
    df['day'] = df['일시'].apply(lambda x : int(x[6:8]))
    df['time'] = df['일시'].apply(lambda x : int(x[9:11]))

    df['holiday'] = df.apply(lambda x : 1 if x['day']==6 else 0, axis = 1)

    df['holiday'][(df['month']==6) & (df['day']==1)] = 1
    df['holiday'][(df['month']==6) & (df['day']==6)] = 1
    df['holiday'][(df['month']==8) & (df['day']==15)] = 1

    df['sin_time'] = np.sin(2*np.pi*df.time/24)
    df['cos_time'] = np.cos(2*np.pi*df.time/24)

    df = df.merge(building_info.iloc[:, :4])
    df['건물유형'] = df['건물유형'].astype('category').cat.codes

    # 요금 가중치
    df['fare_w'] = 0

    # 일반, 산업
    for i in [0,1,3,4,5,6,8,9,10,11] :
        df['fare_w'][(df['건물유형']==i) & ((df['time'] >= 9) & (df['time'] <10))] = 1.144
        df['fare_w'][(df['건물유형']==i) & ((df['time'] >= 12) & (df['time'] <13))] = 1.144
        df['fare_w'][(df['건물유형']==i) & ((df['time'] >= 17) & (df['time'] <22))] = 1.144
        df['fare_w'][(df['건물유형']==i) & ((df['time'] >= 10) & (df['time'] <12))] = 1.965
        df['fare_w'][(df['건물유형']==i) & ((df['time'] >= 13) & (df['time'] <17))] = 1.965
        df['fare_w'][(df['건물유형']==i) & ((df['time'] >= 8) & (df['time'] <9))] = 0.615
        df['fare_w'][(df['건물유형']==i) & ((df['time'] >= 22) | (df['time'] <8))] = 0.451
    # 교육
    df['fare_w'][(df['건물유형']==2) & ((df['time'] >= 9) & (df['time'] <10))] = 0.944
    df['fare_w'][(df['건물유형']==2) & ((df['time'] >= 12) & (df['time'] <13))] = 0.944
    df['fare_w'][(df['건물유형']==2) & ((df['time'] >= 17) & (df['time'] <22))] = 0.944
    df['fare_w'][(df['건물유형']==2) & ((df['time'] >= 10) & (df['time'] <12))] = 1.603
    df['fare_w'][(df['건물유형']==2) & ((df['time'] >= 13) & (df['time'] <17))] = 1.603
    df['fare_w'][(df['건물유형']==2) & ((df['time'] >= 8) & (df['time'] <9))] = 0.497
    df['fare_w'][(df['건물유형']==2) & ((df['time'] >= 22) | (df['time'] <8))] = 0.451

    # 토요일
    df['fare_w'][(df['Weekday']==5) & ((df['time'] >= 10) & (df['time'] <12))] = 1.144
    df['fare_w'][(df['Weekday']==5) & ((df['time'] >= 13) & (df['time'] <17))] = 1.144
    df['fare_w'][(df['Weekday']==5) & ((df['time'] >= 10) & (df['time'] <12)) & (df['건물유형']==2)] = 0.944
    # 공휴일
    df['fare_w'][(df['holiday']==1) & ((df['time'] >= 10) & (df['time'] <12))] = 0.615
    df['fare_w'][(df['holiday']==1) & ((df['time'] >= 13) & (df['time'] <17))] = 0.615
    df['fare_w'][(df['holiday']==1) & ((df['time'] >= 10) & (df['time'] <12)) & (df['건물유형']==2)] = 0.497

    # 주택
    df['fare_w'][(df['건물유형']==7)] = 0.782

    for c in to_remove_columns:
        if c in df.columns:
            df = df.drop(columns=[c])
    return df

In [64]:
date_th = 20220818
train, val = train_test_split(train_df, date_th)
train_x = preprocess_x(train)
train_y = train['전력소비량(kWh)']

valid_x = preprocess_x(val)
valid_y = val['전력소비량(kWh)']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['holiday'][(df['month']==6) & (df['day']==1)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['holiday'][(df['month']==6) & (df['day']==6)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['holiday'][(df['month']==8) & (df['day']==15)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['fare

In [65]:
def validate_multi(valid_x, valid_y, models):
    """
    Args:
        models: dict, {1: model1, 2: model2, ..., 100: model100}
    """
    preds = []
    
    for i in range(1, 101):
        _x = valid_x[valid_x['건물번호'] == i]
        _x = _x.drop(columns=['건물번호', '건물유형', '연면적(m2)', '냉방면적(m2)'])
        pred = models[i].predict(_x).tolist()
        preds.extend(pred)
        
    preds = np.array(preds)
    smape_score, mae_score = SMAPE(valid_y, preds), mae(valid_y, preds)
    
    return smape_score, mae_score

In [66]:
def train_multiple_models(train_x, train_y, n_estimators=100, lr=0.05, md=6, sub=0.75):
    models = {}
    
    for i in tqdm(range(1, 101)):
        _x = train_x[train_x['건물번호'] == i]
        _x = _x.drop(columns=['건물번호', '건물유형', '연면적(m2)', '냉방면적(m2)'])
        _y = train_y[_x.index]
        model_xgb = xgboost.XGBRegressor(n_estimators=n_estimators, learning_rate=lr, max_depth = md,
                                         subsample = sub, verbose=-1)
        model_xgb.fit(_x, _y)
        models[i] = model_xgb
        
    return models

In [67]:
def SMAPE(y, pred):
    smape = abs((y - pred))/((abs(y) + abs(pred)) / 2) * 100
    smape = np.mean(smape)
    return smape

def mae(y, pred):
    return np.mean(abs(y-pred))

def validate(valid_x, valid_y, model):
    pred = model.predict(valid_x)
    smape_score, mae_score = SMAPE(valid_y, pred), mae(valid_y, pred)
    return smape_score, mae_score

In [90]:
models = train_multiple_models(train_x, train_y, 110, 0.08, 7, 0.8)

  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

Parameters: { "verbose" } are not used.



  1%|▊                                                                                 | 1/100 [00:00<00:37,  2.61it/s]

Parameters: { "verbose" } are not used.



  2%|█▋                                                                                | 2/100 [00:00<00:49,  1.98it/s]

Parameters: { "verbose" } are not used.



  3%|██▍                                                                               | 3/100 [00:01<00:39,  2.43it/s]

Parameters: { "verbose" } are not used.



  4%|███▎                                                                              | 4/100 [00:01<00:39,  2.41it/s]

Parameters: { "verbose" } are not used.



  5%|████                                                                              | 5/100 [00:01<00:35,  2.68it/s]

Parameters: { "verbose" } are not used.



  6%|████▉                                                                             | 6/100 [00:02<00:32,  2.90it/s]

Parameters: { "verbose" } are not used.



  7%|█████▋                                                                            | 7/100 [00:02<00:30,  3.04it/s]

Parameters: { "verbose" } are not used.



  8%|██████▌                                                                           | 8/100 [00:02<00:28,  3.26it/s]

Parameters: { "verbose" } are not used.



  9%|███████▍                                                                          | 9/100 [00:03<00:28,  3.20it/s]

Parameters: { "verbose" } are not used.



 10%|████████                                                                         | 10/100 [00:03<00:27,  3.31it/s]

Parameters: { "verbose" } are not used.



 11%|████████▉                                                                        | 11/100 [00:03<00:25,  3.46it/s]

Parameters: { "verbose" } are not used.



 12%|█████████▋                                                                       | 12/100 [00:03<00:24,  3.56it/s]

Parameters: { "verbose" } are not used.



 13%|██████████▌                                                                      | 13/100 [00:04<00:23,  3.64it/s]

Parameters: { "verbose" } are not used.



 14%|███████████▎                                                                     | 14/100 [00:04<00:23,  3.60it/s]

Parameters: { "verbose" } are not used.



 15%|████████████▏                                                                    | 15/100 [00:04<00:23,  3.69it/s]

Parameters: { "verbose" } are not used.



 16%|████████████▉                                                                    | 16/100 [00:05<00:22,  3.69it/s]

Parameters: { "verbose" } are not used.



 17%|█████████████▊                                                                   | 17/100 [00:05<00:22,  3.63it/s]

Parameters: { "verbose" } are not used.



 18%|██████████████▌                                                                  | 18/100 [00:05<00:22,  3.71it/s]

Parameters: { "verbose" } are not used.



 19%|███████████████▍                                                                 | 19/100 [00:05<00:21,  3.72it/s]

Parameters: { "verbose" } are not used.



 20%|████████████████▏                                                                | 20/100 [00:06<00:21,  3.68it/s]

Parameters: { "verbose" } are not used.



 21%|█████████████████                                                                | 21/100 [00:06<00:21,  3.61it/s]

Parameters: { "verbose" } are not used.



 22%|█████████████████▊                                                               | 22/100 [00:06<00:21,  3.65it/s]

Parameters: { "verbose" } are not used.



 23%|██████████████████▋                                                              | 23/100 [00:07<00:23,  3.30it/s]

Parameters: { "verbose" } are not used.



 24%|███████████████████▍                                                             | 24/100 [00:07<00:26,  2.89it/s]

Parameters: { "verbose" } are not used.



 25%|████████████████████▎                                                            | 25/100 [00:07<00:27,  2.76it/s]

Parameters: { "verbose" } are not used.



 26%|█████████████████████                                                            | 26/100 [00:08<00:27,  2.71it/s]

Parameters: { "verbose" } are not used.



 27%|█████████████████████▊                                                           | 27/100 [00:08<00:28,  2.54it/s]

Parameters: { "verbose" } are not used.



 28%|██████████████████████▋                                                          | 28/100 [00:09<00:29,  2.43it/s]

Parameters: { "verbose" } are not used.



 29%|███████████████████████▍                                                         | 29/100 [00:09<00:28,  2.45it/s]

Parameters: { "verbose" } are not used.



 30%|████████████████████████▎                                                        | 30/100 [00:10<00:29,  2.40it/s]

Parameters: { "verbose" } are not used.



 31%|█████████████████████████                                                        | 31/100 [00:10<00:28,  2.46it/s]

Parameters: { "verbose" } are not used.



 32%|█████████████████████████▉                                                       | 32/100 [00:10<00:26,  2.60it/s]

Parameters: { "verbose" } are not used.



 33%|██████████████████████████▋                                                      | 33/100 [00:11<00:24,  2.74it/s]

Parameters: { "verbose" } are not used.



 34%|███████████████████████████▌                                                     | 34/100 [00:11<00:23,  2.80it/s]

Parameters: { "verbose" } are not used.



 35%|████████████████████████████▎                                                    | 35/100 [00:11<00:23,  2.74it/s]

Parameters: { "verbose" } are not used.



 36%|█████████████████████████████▏                                                   | 36/100 [00:12<00:22,  2.84it/s]

Parameters: { "verbose" } are not used.



 37%|█████████████████████████████▉                                                   | 37/100 [00:12<00:24,  2.62it/s]

Parameters: { "verbose" } are not used.



 38%|██████████████████████████████▊                                                  | 38/100 [00:12<00:24,  2.49it/s]

Parameters: { "verbose" } are not used.



 39%|███████████████████████████████▌                                                 | 39/100 [00:13<00:25,  2.44it/s]

Parameters: { "verbose" } are not used.



 40%|████████████████████████████████▍                                                | 40/100 [00:14<00:30,  1.99it/s]

Parameters: { "verbose" } are not used.



 41%|█████████████████████████████████▏                                               | 41/100 [00:14<00:28,  2.08it/s]

Parameters: { "verbose" } are not used.



 42%|██████████████████████████████████                                               | 42/100 [00:15<00:28,  2.05it/s]

Parameters: { "verbose" } are not used.



 43%|██████████████████████████████████▊                                              | 43/100 [00:15<00:26,  2.12it/s]

Parameters: { "verbose" } are not used.



 44%|███████████████████████████████████▋                                             | 44/100 [00:15<00:24,  2.26it/s]

Parameters: { "verbose" } are not used.



 45%|████████████████████████████████████▍                                            | 45/100 [00:16<00:23,  2.36it/s]

Parameters: { "verbose" } are not used.



 46%|█████████████████████████████████████▎                                           | 46/100 [00:16<00:22,  2.40it/s]

Parameters: { "verbose" } are not used.



 47%|██████████████████████████████████████                                           | 47/100 [00:17<00:21,  2.44it/s]

Parameters: { "verbose" } are not used.



 48%|██████████████████████████████████████▉                                          | 48/100 [00:17<00:20,  2.52it/s]

Parameters: { "verbose" } are not used.



 49%|███████████████████████████████████████▋                                         | 49/100 [00:17<00:19,  2.56it/s]

Parameters: { "verbose" } are not used.



 50%|████████████████████████████████████████▌                                        | 50/100 [00:18<00:19,  2.59it/s]

Parameters: { "verbose" } are not used.



 51%|█████████████████████████████████████████▎                                       | 51/100 [00:18<00:19,  2.45it/s]

Parameters: { "verbose" } are not used.



 52%|██████████████████████████████████████████                                       | 52/100 [00:19<00:20,  2.35it/s]

Parameters: { "verbose" } are not used.



 53%|██████████████████████████████████████████▉                                      | 53/100 [00:19<00:19,  2.36it/s]

Parameters: { "verbose" } are not used.



 54%|███████████████████████████████████████████▋                                     | 54/100 [00:19<00:19,  2.36it/s]

Parameters: { "verbose" } are not used.



 55%|████████████████████████████████████████████▌                                    | 55/100 [00:20<00:18,  2.49it/s]

Parameters: { "verbose" } are not used.



 56%|█████████████████████████████████████████████▎                                   | 56/100 [00:20<00:17,  2.55it/s]

Parameters: { "verbose" } are not used.



 57%|██████████████████████████████████████████████▏                                  | 57/100 [00:21<00:17,  2.47it/s]

Parameters: { "verbose" } are not used.



 58%|██████████████████████████████████████████████▉                                  | 58/100 [00:21<00:16,  2.58it/s]

Parameters: { "verbose" } are not used.



 59%|███████████████████████████████████████████████▊                                 | 59/100 [00:21<00:16,  2.55it/s]

Parameters: { "verbose" } are not used.



 60%|████████████████████████████████████████████████▌                                | 60/100 [00:22<00:17,  2.35it/s]

Parameters: { "verbose" } are not used.



 61%|█████████████████████████████████████████████████▍                               | 61/100 [00:22<00:16,  2.35it/s]

Parameters: { "verbose" } are not used.



 62%|██████████████████████████████████████████████████▏                              | 62/100 [00:23<00:16,  2.32it/s]

Parameters: { "verbose" } are not used.



 63%|███████████████████████████████████████████████████                              | 63/100 [00:23<00:15,  2.34it/s]

Parameters: { "verbose" } are not used.



 64%|███████████████████████████████████████████████████▊                             | 64/100 [00:24<00:14,  2.43it/s]

Parameters: { "verbose" } are not used.



 65%|████████████████████████████████████████████████████▋                            | 65/100 [00:24<00:14,  2.49it/s]

Parameters: { "verbose" } are not used.



 66%|█████████████████████████████████████████████████████▍                           | 66/100 [00:24<00:13,  2.52it/s]

Parameters: { "verbose" } are not used.



 67%|██████████████████████████████████████████████████████▎                          | 67/100 [00:25<00:12,  2.57it/s]

Parameters: { "verbose" } are not used.



 68%|███████████████████████████████████████████████████████                          | 68/100 [00:25<00:13,  2.43it/s]

Parameters: { "verbose" } are not used.



 69%|███████████████████████████████████████████████████████▉                         | 69/100 [00:25<00:12,  2.49it/s]

Parameters: { "verbose" } are not used.



 70%|████████████████████████████████████████████████████████▋                        | 70/100 [00:26<00:12,  2.45it/s]

Parameters: { "verbose" } are not used.



 71%|█████████████████████████████████████████████████████████▌                       | 71/100 [00:26<00:11,  2.50it/s]

Parameters: { "verbose" } are not used.



 72%|██████████████████████████████████████████████████████████▎                      | 72/100 [00:27<00:11,  2.44it/s]

Parameters: { "verbose" } are not used.



 73%|███████████████████████████████████████████████████████████▏                     | 73/100 [00:27<00:11,  2.35it/s]

Parameters: { "verbose" } are not used.



 74%|███████████████████████████████████████████████████████████▉                     | 74/100 [00:28<00:11,  2.28it/s]

Parameters: { "verbose" } are not used.



 75%|████████████████████████████████████████████████████████████▊                    | 75/100 [00:28<00:10,  2.36it/s]

Parameters: { "verbose" } are not used.



 76%|█████████████████████████████████████████████████████████████▌                   | 76/100 [00:29<00:10,  2.29it/s]

Parameters: { "verbose" } are not used.



 77%|██████████████████████████████████████████████████████████████▎                  | 77/100 [00:29<00:09,  2.35it/s]

Parameters: { "verbose" } are not used.



 78%|███████████████████████████████████████████████████████████████▏                 | 78/100 [00:29<00:09,  2.32it/s]

Parameters: { "verbose" } are not used.



 79%|███████████████████████████████████████████████████████████████▉                 | 79/100 [00:30<00:09,  2.30it/s]

Parameters: { "verbose" } are not used.



 80%|████████████████████████████████████████████████████████████████▊                | 80/100 [00:30<00:08,  2.24it/s]

Parameters: { "verbose" } are not used.



 81%|█████████████████████████████████████████████████████████████████▌               | 81/100 [00:31<00:08,  2.28it/s]

Parameters: { "verbose" } are not used.



 82%|██████████████████████████████████████████████████████████████████▍              | 82/100 [00:31<00:07,  2.33it/s]

Parameters: { "verbose" } are not used.



 83%|███████████████████████████████████████████████████████████████████▏             | 83/100 [00:32<00:08,  1.95it/s]

Parameters: { "verbose" } are not used.



 84%|████████████████████████████████████████████████████████████████████             | 84/100 [00:32<00:08,  1.99it/s]

Parameters: { "verbose" } are not used.



 85%|████████████████████████████████████████████████████████████████████▊            | 85/100 [00:33<00:07,  2.12it/s]

Parameters: { "verbose" } are not used.



 86%|█████████████████████████████████████████████████████████████████████▋           | 86/100 [00:33<00:06,  2.20it/s]

Parameters: { "verbose" } are not used.



 87%|██████████████████████████████████████████████████████████████████████▍          | 87/100 [00:34<00:05,  2.26it/s]

Parameters: { "verbose" } are not used.



 88%|███████████████████████████████████████████████████████████████████████▎         | 88/100 [00:34<00:05,  2.23it/s]

Parameters: { "verbose" } are not used.



 89%|████████████████████████████████████████████████████████████████████████         | 89/100 [00:34<00:04,  2.23it/s]

Parameters: { "verbose" } are not used.



 90%|████████████████████████████████████████████████████████████████████████▉        | 90/100 [00:35<00:04,  2.25it/s]

Parameters: { "verbose" } are not used.



 91%|█████████████████████████████████████████████████████████████████████████▋       | 91/100 [00:35<00:04,  2.22it/s]

Parameters: { "verbose" } are not used.



 92%|██████████████████████████████████████████████████████████████████████████▌      | 92/100 [00:36<00:03,  2.22it/s]

Parameters: { "verbose" } are not used.



 93%|███████████████████████████████████████████████████████████████████████████▎     | 93/100 [00:36<00:03,  2.28it/s]

Parameters: { "verbose" } are not used.



 94%|████████████████████████████████████████████████████████████████████████████▏    | 94/100 [00:37<00:02,  2.34it/s]

Parameters: { "verbose" } are not used.



 95%|████████████████████████████████████████████████████████████████████████████▉    | 95/100 [00:37<00:02,  2.34it/s]

Parameters: { "verbose" } are not used.



 96%|█████████████████████████████████████████████████████████████████████████████▊   | 96/100 [00:37<00:01,  2.40it/s]

Parameters: { "verbose" } are not used.



 97%|██████████████████████████████████████████████████████████████████████████████▌  | 97/100 [00:38<00:01,  2.39it/s]

Parameters: { "verbose" } are not used.



 98%|███████████████████████████████████████████████████████████████████████████████▍ | 98/100 [00:38<00:00,  2.39it/s]

Parameters: { "verbose" } are not used.



 99%|████████████████████████████████████████████████████████████████████████████████▏| 99/100 [00:39<00:00,  2.26it/s]

Parameters: { "verbose" } are not used.



100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:39<00:00,  2.52it/s]


In [91]:
smape_score, mae_score = validate_multi(valid_x, valid_y, models)
print(f'SMAPE: {smape_score}\nMAE: {mae_score}')

SMAPE: 5.265972766919434
MAE: 102.17343945627803


In [95]:
test_df['THI'] = 9/5*test_df['기온(C)'] - 0.55*(1-test_df['습도(%)']/100)*(9/5*test_df['습도(%)']-26)+32

cdhs = np.array([])

for num in range(1,101,1):
    
    temp = test_df[test_df['건물번호'] == num]
    cdh = CDH(temp['기온(C)'].values)
    cdhs = np.concatenate([cdhs, cdh])
    
test_df['CDH'] = cdhs

In [96]:
test_df['Weekday'] = test_df.apply(lambda x:to_datetime(x['일시']), axis=1)

In [97]:
for i in range(1, 101):
    df = test_df[test_df['건물번호'] == i]
    test_df.loc[df.index, '기온_gap'] = df['기온(C)'] - df.shift(1)['기온(C)']
    test_df.loc[df.index, '풍속_gap'] = df['풍속(m/s)'] - df.shift(1)['풍속(m/s)']
    test_df.loc[df.index, '습도_gap'] = df['습도(%)'] - df.shift(1)['습도(%)']

In [98]:
test_df['기온_gap'] = test_df['기온_gap'].fillna(0)
test_df['풍속_gap'] = test_df['풍속_gap'].fillna(0)
test_df['습도_gap'] = test_df['습도_gap'].fillna(0)

In [99]:
test_df = preprocess_x(test_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['holiday'][(df['month']==6) & (df['day']==1)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['holiday'][(df['month']==6) & (df['day']==6)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['holiday'][(df['month']==8) & (df['day']==15)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['fare

In [100]:
preds_real = []

for i in tqdm(range(1, 101)):
    _x = test_df[test_df['건물번호'] == i]
    _x = _x.drop(columns=['건물번호', '건물유형', '연면적(m2)', '냉방면적(m2)'])
    pred = models[i].predict(_x).tolist()
    preds_real.extend(pred)

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 157.21it/s]


In [None]:
submission = pd.read_csv('./sample_submission.csv')
submission['answer'] = preds_real
submission.to_csv('xgb.csv', index=False)