DATA

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math
from sklearn.metrics import mean_absolute_error

In [40]:
data1 = pd.read_csv("temp+elec2.csv")
data1

Unnamed: 0,year,month,date,ATemp,kWH,humidity,Htemp,LTemp,precipitation
0,2013,6,2013-06-01,24.4,229.0,60,32.6,16.3,12.0
1,2013,7,2013-07-01,25.5,244.0,79,31.3,20.5,165.0
2,2013,8,2013-08-01,27.7,289.0,69,33.9,21.1,45.0
3,2013,9,2013-09-01,21.8,260.0,63,31.1,10.9,50.0
4,2013,10,2013-10-01,15.8,217.0,59,28.1,4.3,6.5
...,...,...,...,...,...,...,...,...,...
117,2023,3,2023-03-01,9.6,206.0,51,25.1,-1.9,2.6
118,2023,4,2023-04-01,13.8,205.0,59,28.4,3.1,6.9
119,2023,5,2023-05-01,19.4,200.0,62,31.2,7.9,14.1
120,2023,6,2023-06-01,23.4,,73,27.9,19.7,195.6


LSTM

In [16]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error
from matplotlib import pyplot as plt
from keras.layers import Dropout

In [52]:
pip install keras-tuner --upgrade


Collecting keras-tuner
  Downloading keras_tuner-1.4.6-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
   ---------------------------------------- 0.0/128.9 kB ? eta -:--:--
   ---------------------------------- ----- 112.6/128.9 kB 6.8 MB/s eta 0:00:01
   ---------------------------------------- 128.9/128.9 kB 1.9 MB/s eta 0:00:00
Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.6 kt-legacy-1.0.5
Note: you may need to restart the kernel to use updated packages.


In [106]:
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from kerastuner.tuners import Hyperband
import numpy as np

def augment_data(X, Y, shift_fraction=0.1, noise_factor=0.01, num_augmentations=10):
    augmented_X = []
    augmented_Y = []

    for _ in range(num_augmentations):
        num_samples, num_features = X.shape

        # 타임 시프트 (Shift)
        shift = int(shift_fraction * num_samples)
        X_shifted = np.roll(X, shift, axis=0)

        # 노이즈 추가
        noise = np.random.normal(loc=0, scale=noise_factor, size=(num_samples, num_features))
        X_noisy = X + noise

        # 조합
        X_combined = np.concatenate([X, X_shifted, X_noisy], axis=0)
        Y_combined = np.concatenate([Y] * 3, axis=0)

        augmented_X.append(X_combined)
        augmented_Y.append(Y_combined)

    return np.vstack(augmented_X), np.vstack(augmented_Y)

# 입력 데이터 선택 (시계열 특성 제외)
X = data2[['month', 'ATemp', 'precipitation', 'humidity','Htemp','LTemp']].values
Y = data2['kWH'].values.reshape(-1, 1)

# Min-Max 스케일링 시계열 특성이 아닌 것들 시계열 특성과 유사하게 만듬
scaler_X = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
scaler_Y = MinMaxScaler(feature_range=(0, 1))
Y_scaled = scaler_Y.fit_transform(Y)

# 데이터셋 증식 (10배)
X_augmented, Y_augmented = augment_data(X_scaled, Y_scaled, num_augmentations=10)

# 데이터셋 분할
X_train_aug, X_test_aug, Y_train_aug, Y_test_aug = train_test_split(X_augmented, Y_augmented, test_size=0.2, random_state=42)

# LSTM 모델 구성
def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units', min_value=32, max_value=256, step=32), 
                   return_sequences=True, 
                   input_shape=(X_train.shape[1], 1)))
    model.add(Dropout(rate=hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(LSTM(units=hp.Int('units', min_value=32, max_value=256, step=32), 
                   return_sequences=False))
    model.add(Dropout(rate=hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(1))
    
    optimizer = Adam(lr=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG', default=1e-3))
    
    model.compile(loss='mean_absolute_error', optimizer=optimizer)
    return model

# Hyperband 튜너 설정
tuner = Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=10,
    factor=3,
    directory='my_tuner_directory',
    project_name='my_lstm_tuner',
    overwrite=True  # 기존 프로젝트를 덮어쓰도록 설정
)


# 탐색 수행 (훈련 데이터, 검증 데이터는 X_train, Y_train을 사용)
tuner.search(X_train, Y_train, epochs=10, validation_data=(X_test, Y_test))

# 최적의 모델 및 하이퍼파라미터 출력
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hyperparameters}")
best_units = best_hyperparameters.get('units')
best_dropout_1 = best_hyperparameters.get('dropout_1')
best_dropout_2 = best_hyperparameters.get('dropout_2')
best_learning_rate = best_hyperparameters.get('learning_rate')

print(f"Best Hyperparameters - Units: {best_units}, Dropout_1: {best_dropout_1}, Dropout_2: {best_dropout_2}, Learning Rate: {best_learning_rate}")




Trial 30 Complete [00h 00m 06s]
val_loss: 0.1407807618379593

Best val_loss So Far: 0.1334385871887207
Total elapsed time: 00h 02m 34s
Best Hyperparameters: <keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters object at 0x000002380DBB9B20>
Best Hyperparameters - Units: 128, Dropout_1: 0.4, Dropout_2: 0.2, Learning Rate: 0.00868418788171632


In [110]:
from keras.callbacks import EarlyStopping
import numpy as np

# 데이터 증식 함수
def augment_data(X, Y, shift_fraction=0.1, noise_factor=0.01, num_augmentations=10):
    augmented_X = []
    augmented_Y = []

    for _ in range(num_augmentations):
        num_samples, num_features = X.shape

        # 타임 시프트 (Shift)
        shift = int(shift_fraction * num_samples)
        X_shifted = np.roll(X, shift, axis=0)

        # 노이즈 추가
        noise = np.random.normal(loc=0, scale=noise_factor, size=(num_samples, num_features))
        X_noisy = X + noise

        # 조합
        X_combined = np.concatenate([X, X_shifted, X_noisy], axis=0)
        Y_combined = np.concatenate([Y] * 3, axis=0)

        augmented_X.append(X_combined)
        augmented_Y.append(Y_combined)

    return np.vstack(augmented_X), np.vstack(augmented_Y)

# 입력 데이터 선택 (시계열 특성 제외)
X = data2[['month', 'ATemp', 'precipitation', 'humidity','Htemp','LTemp']].values
Y = data2['kWH'].values.reshape(-1, 1)

# Min-Max 스케일링 시계열 특성이 아닌 것들 시계열 특성과 유사하게 만듬
scaler_X = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
scaler_Y = MinMaxScaler(feature_range=(0, 1))
Y_scaled = scaler_Y.fit_transform(Y)

# 데이터셋 증식 (10배)
X_augmented, Y_augmented = augment_data(X_scaled, Y_scaled, num_augmentations=10)

# 데이터셋 분할
X_train_aug, X_test_aug, Y_train_aug, Y_test_aug = train_test_split(X_augmented, Y_augmented, test_size=0.2, random_state=42)

# LSTM 모델 구성
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(X_train_aug.shape[1], 1)))
model.add(Dropout(0.4))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

# 옵티마이저 설정
optimizer = Adam(lr=0.00868418788171632)
model.compile(loss='mean_absolute_error', optimizer=optimizer)

early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

# 훈련
model.fit(X_train_aug, Y_train_aug, epochs=1000, verbose=1, batch_size=32, validation_data=(X_test_aug, Y_test_aug), callbacks=[early_stopping])

# 예측 및 역전환
Y_train_pred = model.predict(X_train_aug)
Y_test_pred = model.predict(X_test_aug)

Y_train_pred = scaler_Y.inverse_transform(Y_train_pred)
Y_test_pred = scaler_Y.inverse_transform(Y_test_pred)
Y_train_aug = scaler_Y.inverse_transform(Y_train_aug)
Y_test_aug = scaler_Y.inverse_transform(Y_test_aug)

# 평가 및 시각화
train_mae = mean_absolute_error(Y_train_aug, Y_train_pred)
test_mae = mean_absolute_error(Y_test_aug, Y_test_pred)

print('Train MAE: %.9f' % (train_mae))
print('Test MAE: %.9f' % (test_mae))



Epoch 1/1000


  super().__init__(name, **kwargs)


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000


In [105]:
X_train_aug.shape

(2880, 4)

MLP

In [None]:
Train MAE: 5.368099112
Test MAE: 6.535448201