In [None]:
import pandas as pd
import numpy as np
import os
import glob
import random

import warnings
warnings.filterwarnings("ignore")

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Dacon/train/train.csv')
submission = pd.read_csv('/content/drive/MyDrive/Dacon/sample_submission.csv')

In [None]:
# data = train.csv
# lags = [48]
# cols = ['TARGET'], ['DHI', 'DNI', 'WS', 'RH', 'T']

def create_lag_feats(data, lags, cols):
    
    lag_cols = []
    temp = data.copy()
    for col in cols:
        for lag in lags:
            # 48칸 이동
            temp[col + '_lag_%s'%lag] = temp[col].shift(lag)
            # Target1 = 원래 TARGET값
            # Target2 = 1일 앞으로 댕겼음
            temp['Target1'] = temp['TARGET']
            temp['Target2'] = temp['TARGET'].shift(-48).fillna(method='ffill')
            # lag_cols = 1일 뒤로 밀었음
            lag_cols.append(col + '_lag_%s'%lag)

    return temp, lag_cols    

def preprocess_data(data, target_lags=[48], weather_lags=[48], is_train=True):
    
    temp = data.copy()

    if is_train==True:          
    
        temp, temp_lag_cols1 = create_lag_feats(temp, target_lags, ['TARGET'])
        temp, temp_lag_cols2 = create_lag_feats(temp, weather_lags, ['DHI', 'DNI', 'WS', 'RH', 'T'])
     
        return temp[['Hour'] + temp_lag_cols1 + temp_lag_cols2 + ['Target1', 'Target2']].dropna()

    # Target1, Target2 제거
    elif is_train==False:    
        
        temp, temp_lag_cols1 = create_lag_feats(temp, target_lags, ['TARGET'])
        temp, temp_lag_cols2 = create_lag_feats(temp, weather_lags, ['DHI', 'DNI', 'WS', 'RH', 'T'])
                              
        return temp[['Hour'] + temp_lag_cols1 + temp_lag_cols2].dropna()


df_train = preprocess_data(train, target_lags=[48], weather_lags=[48], is_train=True)

In [None]:
df_test = []

for i in range(81):
    file_path = '/content/drive/MyDrive/Dacon//test/' + str(i) + '.csv'
    temp = pd.read_csv(file_path)
    # 뒤에서부터 48개만 들고온다.
    temp = preprocess_data(temp, target_lags=[48], weather_lags=[48], is_train=False).iloc[-48:]
    df_test.append(temp)

X_test = pd.concat(df_test)
X_test.shape

(3888, 7)

In [None]:
from sklearn.model_selection import train_test_split

# Y_train_1 = Target1
# Y_train_2 = Target2(Target2 = 1일 앞으로 댕겼음)
X_train_1, X_valid_1, Y_train_1, Y_valid_1 = train_test_split(df_train.iloc[:, :-2], df_train.iloc[:, -2], test_size=0.3, random_state=0)
X_train_2, X_valid_2, Y_train_2, Y_valid_2 = train_test_split(df_train.iloc[:, :-2], df_train.iloc[:, -1], test_size=0.3, random_state=0)

In [None]:
quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# LGBM

In [None]:
from lightgbm import LGBMRegressor

# Get the model and the predictions in (a) - (b)
def LGBM(q, X_train, Y_train, X_valid, Y_valid, X_test):    
    # (a) Modeling  
    model = LGBMRegressor(objective='quantile', alpha=q, seed=2021,
                         n_estimators=10000, bagging_fraction=0.7, learning_rate=0.027, subsample=0.7)                   
                         
    model.fit(X_train, Y_train, eval_metric = ['quantile'], 
          eval_set=[(X_valid, Y_valid)], early_stopping_rounds=300, verbose=500)

    # (b) Predictions
    pred = pd.Series(model.predict(X_test).round(2))
    return model, pred

In [None]:
# Target 예측

def train_data_LGBM(X_train, Y_train, X_valid, Y_valid, X_test):

    LGBM_models=[]
    LGBM_actual_pred = pd.DataFrame()

    for q in quantiles:
        print(q)
        model, pred = LGBM(q, X_train, Y_train, X_valid, Y_valid, X_test)
        LGBM_models.append(model)
        LGBM_actual_pred = pd.concat([LGBM_actual_pred,pred],axis=1)

    LGBM_actual_pred.columns=quantiles
    
    return LGBM_models, LGBM_actual_pred

In [None]:
# Target1
LGBM_1, LGBM_results_1 = train_data_LGBM(X_train_1, Y_train_1, X_valid_1, Y_valid_1, X_test)
LGBM_2, LGBM_results_2 = train_data_LGBM(X_train_2, Y_train_2, X_valid_2, Y_valid_2, X_test)

0.1
Training until validation scores don't improve for 300 rounds.
[500]	valid_0's quantile: 1.37212
Early stopping, best iteration is:
[676]	valid_0's quantile: 1.37108
0.2
Training until validation scores don't improve for 300 rounds.
[500]	valid_0's quantile: 2.16051
[1000]	valid_0's quantile: 2.14876
[1500]	valid_0's quantile: 2.14286
[2000]	valid_0's quantile: 2.13669
[2500]	valid_0's quantile: 2.13719
Early stopping, best iteration is:
[2316]	valid_0's quantile: 2.13589
0.3
Training until validation scores don't improve for 300 rounds.
[500]	valid_0's quantile: 2.57354
[1000]	valid_0's quantile: 2.54514
[1500]	valid_0's quantile: 2.53945
[2000]	valid_0's quantile: 2.53633
[2500]	valid_0's quantile: 2.53497
[3000]	valid_0's quantile: 2.53294
[3500]	valid_0's quantile: 2.5275
Early stopping, best iteration is:
[3505]	valid_0's quantile: 2.5274
0.4
Training until validation scores don't improve for 300 rounds.
[500]	valid_0's quantile: 2.6792
[1000]	valid_0's quantile: 2.65925
[1500

# RESULT_1

In [None]:
x_train_1 = df_train.iloc[:, :-2]
y_train_1 = df_train.iloc[:, -2]
x_train_2 = df_train.iloc[:, :-2]
y_train_2 = df_train.iloc[:, -1]

In [None]:
LGBM_predict_1 = pd.DataFrame()
for i in range(9):
    print('☆☆☆☆☆☆☆☆☆☆ ', i*0.1+0.1, ' ☆☆☆☆☆☆☆☆☆☆')
    pred = LGBM_1[i].predict(x_train_1)
    pred = pd.Series(pred)
    LGBM_predict_1 = pd.concat([LGBM_predict_1,pred],axis=1)
LGBM_predict_1.columns = ['0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9']

In [None]:
LGBM_predict_2 = pd.DataFrame()
for i in range(9):
    print('☆☆☆☆☆☆☆☆☆☆ ', i*0.1+0.1, ' ☆☆☆☆☆☆☆☆☆☆')
    pred = LGBM_2[i].predict(x_train_2)
    pred = pd.Series(pred)
    LGBM_predict_2 = pd.concat([LGBM_predict_2,pred],axis=1)
LGBM_predict_2.columns = ['0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9']

In [None]:
LGBM_test_result_1 = pd.DataFrame()
for i in range(9):
    print('☆☆☆☆☆☆☆☆☆☆ ', i*0.1+0.1, ' ☆☆☆☆☆☆☆☆☆☆')
    pred = LGBM_1[i].predict(X_test)
    pred = pd.Series(pred)
    LGBM_test_result_1 = pd.concat([LGBM_test_result_1,pred],axis=1)
LGBM_test_result_1.columns = ['0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9']

In [None]:
LGBM_test_result_2 = pd.DataFrame()
for i in range(9):
    print('☆☆☆☆☆☆☆☆☆☆ ', i*0.1+0.1, ' ☆☆☆☆☆☆☆☆☆☆')
    pred = LGBM_2[i].predict(X_test)
    pred = pd.Series(pred)
    LGBM_test_result_2 = pd.concat([LGBM_test_result_2,pred],axis=1)
LGBM_test_result_2.columns = ['0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9']

In [None]:
LGBM_predict_1.to_csv('/content/drive/MyDrive/Dacon/LGBM_predict_1.csv', index=False)
LGBM_predict_2.to_csv('/content/drive/MyDrive/Dacon/LGBM_predict_2.csv', index=False)
LGBM_test_result_1.to_csv('/content/drive/MyDrive/Dacon/LGBM_test_result_1.csv', index=False)
LGBM_test_result_2.to_csv('/content/drive/MyDrive/Dacon/LGBM_test_result_2.csv', index=False)

# XGB

In [None]:
def xgb_quantile_eval(quantile):
    def loss(preds, dmatrix):
        labels = dmatrix.get_label()
        return ('q{}_loss'.format(quantile),
                np.nanmean((preds >= labels) * (1 - quantile) * (preds - labels) +
                           (preds < labels) * quantile * (labels - preds)))
    return loss

def xgb_quantile_obj(quantile):
    def loss(labels, preds):
        try:
            assert 0 <= quantile <= 1
        except AssertionError:
            raise ValueError("Quantile value must be float between 0 and 1.")

        errors = preds - labels

        left_mask = errors < 0
        right_mask = errors > 0

        grad = -quantile * left_mask + (1 - quantile) * right_mask
        hess = np.ones_like(preds)

        return grad, hess
    return loss

# bst = xgb.train(hyperparams, train, num_rounds,
#                 obj=xgb_quantile_obj, feval=xgb_quantile_eval)

In [None]:
from xgboost import XGBRegressor
import xgboost as xgb

# Get the model and the predictions in (a) - (b)
def XGB(q, X_train, Y_train, X_valid, Y_valid, X_test):    
    # (a) Modeling  
    model = XGBRegressor(objective=xgb_quantile_obj(quantile=q), seed=2021,
                         n_estimators=10000, bagging_fraction=0.7, learning_rate=0.1, subsample=0.7)                 
                         
    model.fit(X_train, Y_train, eval_metric = xgb_quantile_eval(quantile=q), 
          eval_set=[(X_valid, Y_valid)], early_stopping_rounds=300, verbose=500)

    # (b) Predictions
    pred = pd.Series(model.predict(X_test).round(2))
    return model, pred

In [None]:
# Target 예측

def train_data_XGB(X_train, Y_train, X_valid, Y_valid, X_test):

    XGB_models=[]
    XGB_actual_pred = pd.DataFrame()

    for q in quantiles:
        print(q)
        model, pred = XGB(q, X_train, Y_train, X_valid, Y_valid, X_test)
        XGB_models.append(model)
        
        XGB_actual_pred = pd.concat([XGB_actual_pred,pred],axis=1)

    XGB_actual_pred.columns=quantiles
    
    return XGB_models, XGB_actual_pred

In [None]:
# Target1
XGB_1, XGB_results_1 = train_data_XGB(X_train_1, Y_train_1, X_valid_1, Y_valid_1, X_test)
XGB_2, XGB_results_2 = train_data_XGB(X_train_2, Y_train_2, X_valid_2, Y_valid_2, X_test)

# MLP

In [None]:
import tensorflow as tf
from tensorflow.keras.backend import mean, maximum

def quantile_loss(q, y, pred):
  err = (y-pred)
  return mean(maximum(q*err, (q-1)*err), axis=-1)

In [None]:
from keras.callbacks import EarlyStopping

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, restore_best_weights=True, patience=20)

In [None]:
LGBM_predict_1 = pd.read_csv('/content/drive/MyDrive/Dacon/LGBM_predict_1.csv')
LGBM_predict_2 = pd.read_csv('/content/drive/MyDrive/Dacon/LGBM_predict_2.csv')
LGBM_test_result_1 = pd.read_csv('/content/drive/MyDrive/Dacon/LGBM_test_result_1.csv')
LGBM_test_result_2 = pd.read_csv('/content/drive/MyDrive/Dacon/LGBM_test_result_2.csv')

In [None]:
LGBM_predict_1 = LGBM_predict_1.round(2)
LGBM_predict_2 = LGBM_predict_2.round(2)
LGBM_test_result_1 = LGBM_test_result_1.round(2)
LGBM_test_result_2 = LGBM_test_result_2.round(2)

In [None]:
XGB_test_result = pd.read_csv('/content/drive/MyDrive/Dacon/xgb_2.csv')

In [None]:
XGB_test_result_1 = XGB_test_result[XGB_test_result['id'].apply(lambda x : True if 'Day7' in x else False)]
XGB_test_result_2 = XGB_test_result[XGB_test_result['id'].apply(lambda x : True if 'Day8' in x else False)]

In [None]:
import pickle

with open("/content/drive/MyDrive/Dacon/preds_xgb_2.pickle","rb") as fr:
    XGB_predict_1, XGB_predict_2 = pickle.load(fr)

In [None]:
XGB_test_result_1 = XGB_test_result_1.iloc[:, 1:]
XGB_test_result_2 = XGB_test_result_2.iloc[:, 1:]

In [None]:
XGB_test_result_1 = XGB_test_result_1.reset_index().iloc[:, 1:]
XGB_test_result_2 = XGB_test_result_2.reset_index().iloc[:, 1:]

In [None]:
XGB_test_result_1.columns = ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
XGB_test_result_2.columns = ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']

XGB_predict_1.columns = ['0.9', '0.8', '0.7', '0.6', '0.5', '0.4', '0.3', '0.2', '0.1']
XGB_predict_2.columns = ['0.9', '0.8', '0.7', '0.6', '0.5', '0.4', '0.3', '0.2', '0.1']

In [None]:
XGB_predict_1 = XGB_predict_1.round(2)
XGB_predict_2 = XGB_predict_2.round(2)

for i in range(9) : 
  XGB_predict_1.iloc[:, i] = XGB_predict_1.iloc[:, i].apply(lambda x : 0 if x <=0 else x)
  XGB_predict_2.iloc[:, i] = XGB_predict_2.iloc[:, i].apply(lambda x : 0 if x <=0 else x)

In [None]:
def Stacking(LGBM_predict, LGBM_test_result, XGB_predict, XGB_test_result, Y_train, X_test):
  ST_pred = pd.DataFrame()
  ST_models = []

  for q in quantiles :
    print("★★★★★★★★★★  ", q, "  ★★★★★★★★★★")
    pred1 = LGBM_predict[str(q)]
    pred2 = XGB_predict[str(q)]

    MLP_train_x = np.array([pred1, pred2])
    MLP_train_x = MLP_train_x.transpose()
    MLP_train_x = MLP_train_x.reshape(1086, 48, 2)

#==============================================================================================================================================
    MLP_model = tf.keras.Sequential([
                                # tf.keras.layers.Dense(units=6, activation='tanh', input_shape=(1,)),
                                tf.keras.layers.Dense(units=48, input_shape=(48, 2)),
                                tf.keras.layers.Dense(units=24),
                                tf.keras.layers.Dense(units=12),
                                tf.keras.layers.Dense(units=6),
                                tf.keras.layers.Flatten(),
                                tf.keras.layers.Dense(units=100),                            
                                tf.keras.layers.Dense(units=48, activation='relu'),
    ])

    MLP_model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0007), loss=lambda y,pred: quantile_loss(q,y,pred))
    #MLP_model.summary()
    
    MLP_model.fit(MLP_train_x, Y_train, epochs=1000, batch_size=1086, validation_split=0.25, callbacks=[es])

 #==================================================================   
    pred3 = LGBM_test_result[str(q)]
    pred4 = XGB_test_result[str(q)]

    MLP_test_x = np.array([pred3, pred4])
    MLP_test_x = MLP_test_x.transpose()
    MLP_test_x = MLP_test_x.reshape(81, 48, 2)

    pred = MLP_model.predict(MLP_test_x)
    pred = pd.Series(pred.flatten())
 #==================================================================   
    ST_pred = pd.concat([ST_pred, pred],axis=1)
    ST_models.append(MLP_model)

  ST_pred.columns=quantiles

  return ST_models, ST_pred

In [None]:
MLP_models_1, MLP_result_1 = Stacking(LGBM_predict_1[192:52320], LGBM_test_result_1, XGB_predict_1[:-39], XGB_test_result_1, y_train_1[192:52320], X_test)

★★★★★★★★★★   0.1   ★★★★★★★★★★
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Restoring model weights from the end of the best epoch.
Epoch 00060: early stopping
★★★★★★★★★★   0.2   ★★★★★★★★★★
Epoch 1/1000
Epoch 2/1000

In [None]:
MLP_models_2, MLP_result_2 = Stacking(LGBM_predict_2[240-48:52320], LGBM_test_result_2, XGB_predict_2[:-39], XGB_test_result_2, y_train_2[192:52320], X_test)

★★★★★★★★★★   0.1   ★★★★★★★★★★
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000

# RESULT

In [None]:
submission = pd.read_csv('/content/drive/MyDrive/Dacon/sample_submission.csv')
submission.loc[submission.id.str.contains("Day7"), "q_0.1":] = MLP_result_1.sort_index().values
submission.loc[submission.id.str.contains("Day8"), "q_0.1":] = MLP_result_2.sort_index().values
submission

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
0,0.csv_Day7_0h00m,0.0,0.0,0.0,0.0,0.0,0.000000,3.729900,13.514359,27.080038
1,0.csv_Day7_0h30m,0.0,0.0,0.0,0.0,0.0,0.000000,3.794032,13.358665,0.000000
2,0.csv_Day7_1h00m,0.0,0.0,0.0,0.0,0.0,0.000000,4.880234,13.181517,27.436176
3,0.csv_Day7_1h30m,0.0,0.0,0.0,0.0,0.0,0.000000,5.274422,13.634405,22.294998
4,0.csv_Day7_2h00m,0.0,0.0,0.0,0.0,0.0,0.000000,2.656944,12.665893,0.000000
...,...,...,...,...,...,...,...,...,...,...
7771,80.csv_Day8_21h30m,0.0,0.0,0.0,0.0,0.0,0.000000,14.875137,27.642363,43.029587
7772,80.csv_Day8_22h00m,0.0,0.0,0.0,0.0,0.0,0.000000,11.120885,25.983728,0.000000
7773,80.csv_Day8_22h30m,0.0,0.0,0.0,0.0,0.0,0.000000,12.158747,34.509766,40.329529
7774,80.csv_Day8_23h00m,0.0,0.0,0.0,0.0,0.0,2.414246,0.000000,26.027014,0.000000


In [None]:
submission[0:48]

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
0,0.csv_Day7_0h00m,0.0,0.0,0.0,0.0,0.0,0.0,3.7299,13.514359,27.080038
1,0.csv_Day7_0h30m,0.0,0.0,0.0,0.0,0.0,0.0,3.794032,13.358665,0.0
2,0.csv_Day7_1h00m,0.0,0.0,0.0,0.0,0.0,0.0,4.880234,13.181517,27.436176
3,0.csv_Day7_1h30m,0.0,0.0,0.0,0.0,0.0,0.0,5.274422,13.634405,22.294998
4,0.csv_Day7_2h00m,0.0,0.0,0.0,0.0,0.0,0.0,2.656944,12.665893,0.0
5,0.csv_Day7_2h30m,0.0,0.0,0.0,0.0,0.0,0.507586,6.913562,0.0,23.632666
6,0.csv_Day7_3h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.csv_Day7_3h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.631079,27.345919
8,0.csv_Day7_4h00m,0.0,0.0,0.0,0.0,0.0,0.0,6.364333,13.665729,0.0
9,0.csv_Day7_4h30m,0.0,0.0,0.0,0.0,0.0,0.0,5.016449,0.0,0.0


In [None]:
submission.to_csv('/content/drive/MyDrive/Dacon/Stack3.csv', index=False)