In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import platform
platform.system()

if platform.system() == 'Darwin': # Mac 환경 폰트 설정
    plt.rc('font', family='AppleGothic')
elif platform.system() == 'Windows': # Windows 환경 폰트 설정
    plt.rc('font', family='Malgun Gothic')

plt.rc('axes', unicode_minus=False) # 마이너스 폰트 설정
plt.rc('axes', labelsize=30)   # x,y축 label 폰트 크기


# 글씨 선명하게 출력하는 설정
%config InlineBackend.figure_format = 'retina'

In [None]:
import os
import random
import time

from itertools import combinations

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow_addons as tfa

import seaborn as sns

In [None]:
dataframe = pd.read_csv("C:/Users/wjddu/git/Domestic-Solar-Power-Output-Prediction/최종 data.csv")

In [None]:
dataframe

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

def set_random_seed(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)


set_random_seed(seed=0)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')), tf.test.is_built_with_cuda())

In [None]:
X_features = ['month', 'hour', '기온', '습도', '이슬점 온도', '일사량', '구름양']
y_feature = ['발전량']

X = dataframe[X_features]
y = dataframe[y_feature]

X.shape, y.shape

In [None]:
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

X_norm, y_norm = X.copy(), y.copy()
X_norm.iloc[:, 2:] = x_scaler.fit_transform(X_norm.iloc[:, 2:])
y_norm.iloc[:, :] = y_scaler.fit_transform(y_norm.iloc[:, :])

X_norm.shape, y_norm.shape

In [None]:
dataframe.describe()

In [None]:
def split_sequences(feature, label, timestep=22):
    X, y = [], []
    
    for i in range(timestep, len(feature)):
        
        _X = feature[(i-timestep):i]
        _y = label[i:i+1]

        X.append(_X)
        y.append(_y)
    return np.array(X), np.array(y)

X_total, y_total = split_sequences(X_norm.values, y_norm.values.flatten())

In [None]:
X_norm

In [None]:
y_norm

In [None]:
X_total

In [None]:
y_total

In [None]:
X_learn, X_test, y_learn, y_test = train_test_split(X_total, y_total, test_size=0.2, shuffle=False, random_state=0)
y_test_inv = y_scaler.inverse_transform(y_test)

# y_true_df = pd.DataFrame({'y_true': y_test_inv.ravel()})
# y_true_df.to_csv('./data_0310/total_result.csv', index=False)

X_learn.shape, X_test.shape, y_learn.shape, y_test.shape

In [None]:
def create_train_valid_test(X, y, shuffle=True):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=X_test.shape[0], random_state=0, shuffle=shuffle)
    return X_train, X_val, y_train, y_val

In [None]:
def generate_model_lstm(trainX, trainy, valX, valy, units=60, name='lstm', ndx=0):
    
    print(units)
    
    start_time = time.time()
    tf.keras.backend.clear_session()
    set_random_seed(seed=0)
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(units))
    model.add(tf.keras.layers.Dense(1))
    
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=0, mode='auto', restore_best_weights=True)
    
    filepath = './data/0222/%s_%d_best.hdf5' % (name, ndx)
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto')
    tqdm_callback = tfa.callbacks.TQDMProgressBar(show_epoch_progress=False)
    
    optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
    
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mean_squared_error'])
    
    model.fit(trainX, trainy, epochs=1000, validation_data = (valX, valy), verbose=0, shuffle=False, callbacks=[early_stop, tqdm_callback])
    print("--- %s seconds ---" % (time.time() - start_time))
    return model

def evaluate(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    return np.round(r2*100, 1), np.round(mae, 1), np.round(rmse, 1)

def save_result(colname, data):
    path = './data_0310/total_result.csv'
    result_df = pd.read_csv(path)
    result_df[colname] = data.ravel()
    result_df.to_csv(path, index=False)

In [None]:
X_train, X_val, y_train, y_val = create_train_valid_test(X_learn, y_learn, shuffle=False)

X_train.shape, X_val.shape, y_train.shape, y_val.shape

In [None]:
model = generate_model_lstm(X_train, y_train, X_val, y_val, units=100)

y_forecast = model.predict(X_test)
y_forecast_inv = y_scaler.inverse_transform(y_forecast)
r2, mae, rmse = evaluate(y_test_inv, y_forecast_inv)


r2, rmse

In [None]:
x_range = range(100,230)
plt.rcParams["figure.figsize"] = (20,8)
plt.plot(x_range, y_test[x_range], color='red', label="true")
plt.plot(x_range, y_forecast[x_range], color='blue', label="pred")
plt.legend()
plt.grid()
plt.show()