In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import pandas as pd
import numpy as np
import random
import pickle
import re
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
import seaborn as sns
# from ucimlrepo import fetch_ucirepo 

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, random_split

from data.data_loader import PowerConsumptionDataset, PowerWeatherDataset
from models.lstm import LSTMModel
from models.lstm_attention import LSTMWithAttention, BiLSTMWithAttention
from models.gru import GRUModel
from models.utils import create_model, train_and_evaluate, load_model, evaluate_r2_score
from explainers.lime import LimeExplainer
from explainers.shap import ShapExplainer
from explainers.attention import AttentionExplainer
from explainers.grad_cam import GradCAMExplainer
from explainers.lrp import LRPExplainer

In [2]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [43]:
# 'LSTM' or 'GRU' for LIME and SHAP, 'LSTM_Attention' for 'Attention' 
model_name = 'LSTM'  
model_name_attention = 'LSTM_Attention'  
hidden_size = 50
num_layers = 2
output_size = 1
dropout = 0.2
num_epochs = 50
batch_size = 64
learning_rate = 0.001
sequence_length = 60  # e.g., past 60 minutes
feature_idx = [0,1,2,3,4,5,6]
# feature_idx = []
if len(feature_idx) == 0: num_of_features = 7
else: num_of_features = len(feature_idx)
input_size = num_of_features  # Number of total features in the dataset

In [44]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [45]:
class PowerConsumptionDataset:
    def __init__(self, file_path, sequence_length=24*30, prediction_length=24, target_features=[]):
        self.file_path = file_path
        self.sequence_length = sequence_length
        self.prediction_length = prediction_length
        self.scaler = MinMaxScaler()

        all_features = ['Global_active_power', 'Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
        
        
        if len(target_features) == 0:
            # self.selected_features = ['Global_active_power', 'Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
            self.selected_features = ['Global_active_power', 'Global_intensity', 
                                      'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3', 
                                      'Temp_Avg', 'Humidity_Avg', 'sin_hour', 'cos_hour', 
                                      'sin_day', 'cos_day', 'sin_month', 'cos_month']
        else:
            self.selected_features = target_features
    
        # self.selected_features = all_features[0:num_of_features]
        
    def load_data(self):
        # Load and preprocess data
        data = pd.read_csv(self.file_path)
        data.drop(columns=['datetime'], errors='ignore', inplace=True)
        # Fill missing values
        data.fillna(data.mean(), inplace=True)

        # Select all features
        data_selected = data[self.selected_features]

        # Normalize the data
        data_scaled = self.scaler.fit_transform(data_selected.values)

        # Create sequences
        sequences, targets = self.create_sequences(data_scaled)

        # Split the data into train and eval sets
        train_sequences, eval_sequences, train_targets, eval_targets = train_test_split(sequences, targets, test_size=0.2, random_state=42)

        return train_sequences, eval_sequences, train_targets, eval_targets

    # def create_sequences(self, data):
    #     sequences = []
    #     targets = []
    #     for i in range(len(data) - self.sequence_length):
    #         sequences.append(data[i:i + self.sequence_length])
    #         targets.append(data[i + self.sequence_length, 0])  # Predicting 'Global_active_power'
    #     return np.array(sequences), np.array(targets)
    def create_sequences(self, data):
        sequences = []
        targets = []
        for i in range(len(data) - self.sequence_length - self.prediction_length):
            sequences.append(data[i:i + self.sequence_length])
            # Next 'prediction_length' values for 'Global_active_power' (first column)
            targets.append(data[i + self.sequence_length : i + self.sequence_length + self.prediction_length, 0])
        return np.array(sequences), np.array(targets)



class PowerWeatherDataset:
    def __init__(self, file_path, sequence_length=24*30, prediction_length=24, target_features=[]):
        self.file_path = file_path
        self.sequence_length = sequence_length  
        self.prediction_length = prediction_length  
        self.scaler = MinMaxScaler()

        if len(target_features) == 0:
            self.selected_features = ['Global_active_power', 'Global_intensity', 
                                      'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3', 
                                      'Temp_Avg', 'Humidity_Avg', 'sin_hour', 'cos_hour', 
                                      'sin_day', 'cos_day', 'sin_month', 'cos_month']
        else:
            self.selected_features = target_features

    def load_data(self):
        # Load and preprocess data
        data = pd.read_csv(self.file_path)
        data.drop(columns=['datetime'], errors='ignore', inplace=True)

        
        data.fillna(method='ffill', inplace=True)  # Use forward fill or interpolation for time series data
        data.interpolate(method='linear', inplace=True)

        # Scale selected features
        data_scaled = self.scaler.fit_transform(data[self.selected_features].values)

        # Train-test split before creating sequences
        train_size = int(len(data_scaled) * 0.8)
        train_data, eval_data = data_scaled[:train_size], data_scaled[train_size:]

        # Create sequences for both train and eval
        train_sequences, train_targets = self.create_sequences(train_data)
        eval_sequences, eval_targets = self.create_sequences(eval_data)

        return train_sequences, eval_sequences, train_targets, eval_targets

    def create_sequences(self, data):
        sequences = []
        targets = []

        for i in range(len(data) - self.sequence_length - self.prediction_length):
            sequences.append(data[i:i + self.sequence_length, :])
            targets.append(data[i + self.sequence_length: i + self.sequence_length + self.prediction_length, 0])

        return np.array(sequences), np.array(targets)

target_features = ['Global_active_power', 'Global_active_power', 'Voltage', 'Global_intensity', 
                   'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
24/5

In [46]:
# target_features = ['Global_active_power', 'Global_active_power', 'Voltage', 'Global_intensity', 
#                    'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
# file_path = 'data/hourly_household_power_consumption.csv'

# target_features = ['Global_active_power', 'Global_intensity', 
#                    'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3', 
#                    'Temp_Avg', 'Humidity_Avg']
# file_path = 'data/final_hourly_power_weather_avg.csv'

target_features = ['Global_active_power', 'Global_intensity', 
                   'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3', 
                   'Temp_Avg', 'Humidity_Avg', 'sin_hour', 'cos_hour', 
                   'sin_day', 'cos_day', 'sin_month', 'cos_month']
file_path = 'data/final_hourly_power_weather_avg_with_time.csv'


sequence_length = 24*30
prediction_length = 24

dataset = PowerConsumptionDataset(file_path=file_path, 
                              sequence_length=sequence_length, prediction_length=prediction_length, 
                              target_features = target_features)

train_sequences, eval_sequences, train_targets, eval_targets = dataset.load_data()

print('train_sequences shape: ', train_sequences.shape)
print('train_targets shape: ', train_targets.shape)
print('eval_sequences shape: ', eval_sequences.shape)
print('eval_targets shape: ', eval_targets.shape)

train_sequences shape:  (16896, 720, 13)
train_targets shape:  (16896, 24)
eval_sequences shape:  (4225, 720, 13)
eval_targets shape:  (4225, 24)


In [47]:
# 'LSTM' or 'GRU' for LIME and SHAP, 'LSTM_Attention' for 'Attention' 
model_name = 'LSTM'  
input_size = len(dataset.selected_features)  # Number of features
hidden_size = 128
num_layers = 2  
output_size = prediction_length
num_epochs = 100
dropout = 0.3
batch_size = 64
learning_rate = 0.001
patience = 10
num_of_features = input_size

In [48]:
model = create_model(model_name, input_size, 
                     hidden_size, num_layers, 
                     output_size, dropout)

model_path = './tmp/{}_ftnum_{}_days_{}_out_{}.pth'.format(model_name, 
                                                           num_of_features,
                                                           int(sequence_length/24),
                                                           output_size)
print(model_path)

./tmp/LSTM_ftnum_13_days_30_out_24.pth


In [49]:
model

LSTMModel(
  (lstm): LSTM(13, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=24, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [50]:
def train_and_evaluate2(model, model_name, train_sequences, train_targets, 
                       eval_sequences, eval_targets, model_path, num_epochs=100, 
                       batch_size=64, learning_rate=0.001, patience=5):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Data loader
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(train_sequences, dtype=torch.float32), torch.tensor(train_targets, dtype=torch.float32))
    eval_dataset = torch.utils.data.TensorDataset(torch.tensor(eval_sequences, dtype=torch.float32), torch.tensor(eval_targets, dtype=torch.float32))
    
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=batch_size, shuffle=False)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #, weight_decay=1e-5)

    best_val_loss = np.inf
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        # tqdm을 사용하여 학습 진행도 표시
        with tqdm(train_loader, unit="batch") as tepoch:
            for sequences_batch, targets_batch in tepoch:
                tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")
                
                # Move batch to GPU
                sequences_batch, targets_batch = sequences_batch.to(device), targets_batch.to(device)
                
                # Forward pass
                if 'Attention' in model_name:
                    outputs, _ = model(sequences_batch) 
                else:
                    outputs = model(sequences_batch)

                loss = criterion(outputs.squeeze(), targets_batch)

                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # 현재 배치 손실을 누적
                running_loss += loss.item()
                tepoch.set_postfix(loss=loss.item())

        # Epoch 단위로 평균 손실 계산
        train_loss = running_loss / len(train_loader)

        # Evaluation on the validation set
        model.eval()
        eval_loss = 0.0
        with torch.no_grad():
            for sequences_batch, targets_batch in eval_loader:
                # Move batch to GPU
                sequences_batch, targets_batch = sequences_batch.to(device), targets_batch.to(device)

                if 'Attention' in model_name:
                    val_outputs, _ = model(sequences_batch) 
                else:
                    val_outputs = model(sequences_batch)
                loss = criterion(val_outputs.squeeze(), targets_batch)
                eval_loss += loss.item()

        eval_loss /= len(eval_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {eval_loss:.4f}')

        # Check if the validation loss improved
        if eval_loss < best_val_loss:
            best_val_loss = eval_loss
            torch.save(model.state_dict(), model_path)
            print(f"Validation loss improved. Model saved at epoch {epoch+1}")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # Early stopping
        if epochs_no_improve >= patience:
            print(f"Early stopping applied at epoch {epoch+1}")
            break

In [51]:
if os.path.exists(model_path):
    print(f"Loading the pre-trained {model_name} model...")
    model = load_model(model, model_path)
    # model = nn.DataParallel(model)
    model.to(device)
else:
    print(f"Training a new {model_name} model...")
    train_and_evaluate2(model, model_name, train_sequences, train_targets, 
                       eval_sequences, eval_targets, model_path,
                       num_epochs, batch_size, learning_rate, patience)

Loading the pre-trained LSTM model...


In [52]:
r2 = evaluate_r2_score(model, eval_sequences, eval_targets, model_name)
print(r2)

R² Score: 0.9370
0.9370015263557434


In [53]:
"""
hidden_size = 128, num_layers = 3 -> 0.5266
hidden_size = 256, num_layers = 3 -> 0.9083 / 0.0869

"""
input_size = len(dataset.selected_features)  # Number of features
hidden_size = 256
num_layers = 3  
output_size = prediction_length
num_epochs = 100
dropout = 0.1
batch_size = 64
learning_rate = 0.001
patience = 10
num_of_features = input_size

model_name = 'LSTM_Attention'  

model = create_model(model_name, input_size, 
                     hidden_size, num_layers, 
                     output_size, dropout)

model_path = './tmp/{}_ftnum_{}_days_{}_out_{}.pth'.format(model_name, 
                                                           num_of_features,
                                                           int(sequence_length/24),
                                                           output_size)
print(model_path)
print(model)

./tmp/LSTM_Attention_ftnum_13_days_30_out_24.pth
LSTMWithAttention(
  (lstm): LSTM(13, 256, num_layers=3, batch_first=True, dropout=0.1)
  (attention): Attention(
    (attention_layer): Linear(in_features=256, out_features=256, bias=True)
  )
  (fc): Linear(in_features=256, out_features=24, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)


In [54]:
if os.path.exists(model_path):
    print(f"Loading the pre-trained {model_name} model...")
    model = load_model(model, model_path)
    # model = nn.DataParallel(model)
    model.to(device)
else:
    print(f"Training a new {model_name} model...")
    train_and_evaluate2(model, model_name, train_sequences, train_targets, 
                       eval_sequences, eval_targets, model_path,
                       num_epochs, batch_size, learning_rate, patience)

Loading the pre-trained LSTM_Attention model...


In [55]:
r2 = evaluate_r2_score(model, eval_sequences, eval_targets, model_name)
print(r2)

R² Score: 0.7514
0.751369059085846


'LSTM'