In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import sklearn
from sklearn import tree
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import  mean_squared_error ,mean_absolute_percentage_error
from sklearn.ensemble import RandomForestRegressor, VotingRegressor 

from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingRegressor

In [2]:
df = pd.read_csv('./data/df_power_combined.csv')

In [3]:
aws = pd.read_csv('./data/2020_2022_aws.csv')
aws_1 = aws[aws['지점명']=='기상청']
aws_1['일시'] = pd.to_datetime(aws_1['일시'])
aws_1['연'] = aws_1['일시'].dt.year
aws_1['월'] = aws_1['일시'].dt.month
aws_1['일'] = aws_1['일시'].dt.day

In [8]:
aws_2 = aws_1.drop(['일시','지점명'], axis=1).groupby(['연','월','일']).mean()
aws3 = aws_2.reset_index()
aws4 = aws3[['연', '월', '일', '기온(°C)', '풍향(deg)', '풍속(m/s)', '강수량(mm)','습도(%)']]

df['기준일시'] = pd.to_datetime(df['기준일시'])
df['연'] = df['기준일시'].dt.year
df['월'] = df['기준일시'].dt.month
df['일'] = df['기준일시'].dt.day

df_e = df.drop(['기준일시'], axis=1).groupby(['연','월','일']).sum()
df_e_1 = df_e.reset_index()
df_e_2 = df_e_1[['연','월','일','현재수요(MW)']]
total = pd.merge(aws4,df_e_2,on=['연','월','일'])

In [51]:
def DT_anaytics(X_train, X_test,y_train, y_test):    
    DT = DecisionTreeRegressor()
    params = {'max_depth': [5], 'min_samples_split': [100, 1000, 10000, 100000]}
    DT_search = GridSearchCV(DT, params, cv=5, scoring='neg_mean_squared_error')
    DT_search.fit(X_train,y_train)
    best_clf = DT_search.best_estimator_
    y_pred = DT_search.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mse = mean_squared_error(y_test, y_pred)
# Calculate MAPE
    mape= mean_absolute_percentage_error(y_test, y_pred)
    print(f'''
rmse for Decision Tree :{rmse}
mse for Decision Tree :{mse}
mape for Decision Tree :{mape}
''')

def RF_anaytics(X_train, X_test,y_train, y_test):    
    rfc = RandomForestRegressor(max_depth=20, random_state=24)
    rfc.fit(X_train,y_train)
    y_pred = rfc.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mse = mean_squared_error(y_test, y_pred)
    mape= mean_absolute_percentage_error(y_test, y_pred)
    print(f'''
rmse for RF_anaytics :{rmse}
mse for RF_anaytics :{mse}
mape for Decision Tree :{mape}
''')


def MLP_anaytics(X_train, X_test,y_train, y_test): 
# Create the MLPRegressor with 3 hidden layers, 1500 iterations, and 30% dropout
    regr = MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=1500, random_state=42, alpha=0.03)
    regr.fit(X_train, y_train)
    regr.predict(X_test)
    y_pred = regr.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mse = mean_squared_error(y_test, y_pred)
    mape= mean_absolute_percentage_error(y_test, y_pred)
    print(f'''
rmse for MLP_anaytics :{rmse}
mse for MLP_anaytics :{mse}
mape for MLP_anaytics :{mape}
''')
    
    
def ensemble(X_train, X_test, y_train, y_test):
    # Individual regressors
    rf_regressor = RandomForestRegressor(max_depth=20, random_state=24)
    mlp_regressor = MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=1500, random_state=42, alpha=0.03)

    # Ensemble regressor
    ensemble_regressor = VotingRegressor(estimators=[
        ('rf', rf_regressor),
        ('mlp', mlp_regressor)
    ])

    ensemble_regressor.fit(X_train, y_train)
    y_pred = ensemble_regressor.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    print(f'''
rmse for Ensemble :{rmse}
mse for Ensemble :{mse}
mape for Ensemble :{mape}
''')

In [54]:
total_1 = total[['기온(°C)', '풍향(deg)', '풍속(m/s)', '강수량(mm)', '습도(%)','현재수요(MW)']].dropna()

X_train, X_test, y_train, y_test = train_test_split(total_1.drop('현재수요(MW)', axis=1),total_1['현재수요(MW)'], random_state= 24)
# Create a StandardScaler object
scaler = StandardScaler()
# Fit the scaler on your data and transform it
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

DT_anaytics(X_train, X_test,y_train, y_test)
RF_anaytics(X_train, X_test,y_train, y_test)
MLP_anaytics(X_train, X_test,y_train, y_test)
ensemble(X_train, X_test,y_train, y_test)


rmse for Decision Tree :1694332.0303341905
mse for Decision Tree :2870761029016.38
mape for Decision Tree :0.07479417837955243


rmse for RF_anaytics :1622366.0676180415
mse for RF_anaytics :2632071657358.4277
mape for Decision Tree :0.07030907073297123






rmse for MLP_anaytics :1621178.919023986
mse for MLP_anaytics :2628221087487.7793
mape for MLP_anaytics :0.07188321964978714






rmse for Ensemble :1566156.4016793799
mse for Ensemble :2452845874521.303
mape for Ensemble :0.06883718497499088






rmse for Stacking Ensemble :1551713.906707376
mse for Stacking Ensemble :2407816048269.0674
mape for Stacking Ensemble :0.0674778548400263

