In [15]:
#!pip install pandas numpy scikit-learn torch tqdm
#!pip install torchsummary
#GPU를 사용하여 학습시킬 경우 (필수X)
#!pip install torch==<12.3> torchvision torchaudio -f https://download.pytorch.org/whl/cu<12.3>/torch_stable.html

In [16]:
import pandas as pd
import numpy as np
import random
import os

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader 
from torchsummary import summary

from tqdm.auto import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

In [17]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

In [18]:
# Data Load
new_data = pd.read_csv('./train_sun.csv')

In [19]:
new_data.head()

Unnamed: 0,date,TA,WS,WA,HM,PA,SI,CA,PM10,plant_volume,wattage
0,20190101 00,-0.7,1.2,20.0,46.0,1026.1,0.0,9.0,8.0,2636.54,8.93544
1,20190101 01,-1.5,1.3,320.0,53.0,1026.2,0.0,0.0,12.0,2636.54,5.475148
2,20190101 02,-1.8,1.0,20.0,55.0,1026.2,0.0,0.0,23.0,2636.54,0.518472
3,20190101 03,-2.0,0.2,0.0,55.0,1026.3,0.0,7.0,19.0,2636.54,0.180008
4,20190101 04,-1.5,0.4,0.0,54.0,1026.1,0.0,9.0,20.0,2636.54,0.055592


In [20]:
# Feature 및 Target 선택
features = ['TA', 'WS', 'WA', 'HM', 'PA', 'SI', 'CA', 'PM10', 'plant_volume']
target = 'wattage'

# Data Pre-processing
new_data['date'] = pd.to_datetime(new_data['date'])
new_data.set_index('date', inplace=True)

In [21]:
# Hyperparameter Setting
input_size = len(features) #9
hidden_size = 64
num_layers = 2
output_size = 1
num_epochs = 4
window_size = 48
batch_size = 64
learning_rate = 0.001

In [22]:
# Dataset & DataLoader
scaler = MinMaxScaler()
new_data_scaled = scaler.fit_transform(new_data)

class TimeSeriesDataset(Dataset):
    def __init__(self, df, window_size):
        self.df = df
        self.window_size = window_size

    def __len__(self):
        return len(self.df) - self.window_size

    def __getitem__(self, idx):
        x = torch.tensor(self.df[idx:idx+self.window_size, :-1], dtype=torch.float)
        y = torch.tensor(self.df[idx+self.window_size, -1], dtype=torch.float)
        return x, y

def create_data_loader(df, window_size, batch_size):
    dataset = TimeSeriesDataset(df, window_size)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    return data_loader

new_data_loader = create_data_loader(new_data_scaled, window_size, batch_size)

In [23]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x의 shape를 (batch_size, sequence_length, input_size)로 조정
        x = x.view(x.size(0), -1, x.size(-1))
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        return out

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"current device: {device}")

model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

current device: cpu


In [25]:
# Train

# Lists to store loss values
train_losses = []

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"current device: {device}")

model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(new_data_loader):
        inputs = inputs.to(device)
        labels = labels.unsqueeze(1).to(device)

        # Forward
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 300 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.10f}' 
                   .format(epoch+1, num_epochs, i+1, len(new_data_loader), loss.item()))

current device: cpu
Epoch [1/10], Step [300/548], Loss: 0.0078756362
Epoch [2/10], Step [300/548], Loss: 0.0040824418
Epoch [3/10], Step [300/548], Loss: 0.0035921382
Epoch [4/10], Step [300/548], Loss: 0.0037460134
Epoch [5/10], Step [300/548], Loss: 0.0038389687
Epoch [6/10], Step [300/548], Loss: 0.0037864386
Epoch [7/10], Step [300/548], Loss: 0.0038980665
Epoch [8/10], Step [300/548], Loss: 0.0041963318
Epoch [9/10], Step [300/548], Loss: 0.0048571788
Epoch [10/10], Step [300/548], Loss: 0.0047805575


In [26]:
# Inference
model.eval()

test_predictions = []

with torch.no_grad():
    for i in range(len(new_data_scaled) - window_size):
        x = torch.Tensor(new_data_scaled[i:i+window_size, :-1]).to(device)
        new_x = model(x.view(1, window_size, -1))
        
        new_data_scaled[i+window_size, -1] = new_x.item()
        test_predictions.append(new_x.detach().cpu().numpy().item())

In [27]:
#모델 저장
torch.save(model.state_dict(), 'model_new.pth')

In [28]:
# 모델 불러오기
loaded_model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)
loaded_model.load_state_dict(torch.load('model_new.pth'))
loaded_model.eval()

LSTM(
  (lstm): LSTM(9, 64, num_layers=2, batch_first=True)
  (lstm1): LSTM(9, 32, num_layers=2, batch_first=True)
  (lstm2): LSTM(9, 16, num_layers=2, batch_first=True)
  (lstm3): LSTM(9, 1, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [1]:
#summary(loaded_model, input_size=(window_size, input_size), device=str(device))