## **1. 필요한 모듈 선언**
---

In [None]:
import torch
from torch import nn, optim, cuda
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler

## **2. Device 및 Seed 설정**
---

In [None]:
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [None]:
np.random.seed(42)
torch.manual_seed(42)
if device == 'cuda':
    cuda.manual_seed_all(42)

## **3. 데이터셋 로드 & 전처리**
---



In [None]:
class stock_dataset(Dataset):
    def __init__(self, dataset, scaler, seq_length):
        self.dataset = scaler.transform(dataset)
        self.seq_length = seq_length
 
    def __len__(self):
        return len(self.dataset) - self.seq_length
    
    def __getitem__(self, idx):
        x = self.dataset[idx:idx+self.seq_length, :]
        x = torch.FloatTensor(x)
        y = self.dataset[idx+self.seq_length, [-1]]
        y = torch.FloatTensor(y)
        return x, y

In [None]:
dataset_path = '/content/stock_daily.csv'
dataset = pd.read_csv(dataset_path, sep=',', skiprows=[0])
dataset = dataset[::-1]

In [None]:
dataset

Unnamed: 0,# Open,High,Low,Volume,Close
731,568.002570,568.002570,552.922516,13100,558.462551
730,561.202549,566.432590,558.672539,41200,559.992565
729,566.892592,567.002574,556.932537,10800,556.972503
728,558.712504,568.452595,558.712504,7900,567.162558
727,599.992707,604.832763,562.192568,147100,567.002574
...,...,...,...,...,...
4,819.000000,823.000000,816.000000,1053600,820.450012
3,819.359985,823.000000,818.469971,1304000,818.979980
2,819.929993,824.400024,818.979980,1281700,824.159973
1,823.020020,828.070007,821.655029,1597800,828.070007


In [None]:
""" train과 test로 데이터 분리 """
len_data = dataset.shape[0]
train_data = dataset.iloc[:int(len_data*0.7), :]
test_data  = dataset.iloc[int(len_data*0.7):, :]
 
print('train_data :', train_data.shape)
print('test_data  :', test_data.shape)

train_data : (512, 5)
test_data  : (220, 5)


In [None]:
"""데이터 정규화"""
# scaler = StandardScaler()   # 기본 스케일. 평균과 표준편차 사용
scaler = MinMaxScaler()     # 최대/최소값이 각각 1, 0이 되도록 스케일링
# scaler = MaxAbsScaler()     # 최대절대값과 0이 각각 1, 0이 되도록 스케일링
# scaler = RobustScaler()     # 중앙값(median)과 IQR(interquartile range) 사용. 아웃라이어의 영향을 최소화
 
scaler.fit(train_data)
print(scaler)

MinMaxScaler(copy=True, feature_range=(0, 1))


In [None]:
N = 7
batch_size = 128
 
train_dataset = stock_dataset(dataset=train_data, scaler=scaler, seq_length=N)
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size)
 
test_dataset = stock_dataset(dataset=test_data , scaler=scaler, seq_length=N)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=len(test_dataset))

## **4. 모델 선언**
---



In [None]:
"""hyper parameters"""
input_size = 5
hidden_size = 10
num_layers = 2
output_size = 1

learning_rate = 0.01
nb_epochs = 200

In [None]:
class Model(nn.Module):
    def __init__(self, input_size=5, hidden_size=10, output_size=1, num_layers=1):
        super(Model, self).__init__()
        # self.net = nn.RNN(input_size, hidden_size, batch_first=True, num_layers=num_layers)
        # self.net = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=num_layers)
        self.net = nn.GRU(input_size, hidden_size, batch_first=True, num_layers=num_layers)

        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.net(x)      # 모든 시점(timesteps)의 hidden states  out: 마지막 시점(timestep)의 hidden state
        out = self.fc(out[:, -1])
        return out

In [None]:
model = Model(input_size=input_size,
              hidden_size=hidden_size,
              output_size=output_size)
model = model.to(device)

## **5. 학습**
---



In [None]:
# loss & optimizer setting

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def model_train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    for batch_idx, (x_train, y_train) in enumerate(train_loader): 
        x_train, y_train = x_train.to(device), y_train.to(device)
        prediction = model(x_train)
        loss = criterion(prediction, y_train)
        train_loss += loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return train_loss


def model_test(model, test_loader, criterion):
    model.eval()
    for (x_test, y_test) in test_loader:
        x_test, y_test = x_test.to(device), y_test.to(device)
        prediction = model(x_test)
        loss = criterion(prediction, y_test)

    return loss

In [None]:
# train start

for epoch_idx in range(nb_epochs + 1):
    if epoch_idx % 10 == 0:
        print('[ Epoch: {:4d}/{} ]'.format(epoch_idx, nb_epochs), end='\t')  

        train_loss = model_train(model, train_loader, criterion, optimizer)
        test_loss = model_test(model, test_loader, criterion)

        print('train_loss : {:.4f} \t test_loss : {:.4f}'.format(train_loss, test_loss))

[ Epoch:    0/200 ]	train_loss : 0.0691 	 test_loss : 0.0045
[ Epoch:   10/200 ]	train_loss : 0.0699 	 test_loss : 0.0065
[ Epoch:   20/200 ]	train_loss : 0.0521 	 test_loss : 0.0552
[ Epoch:   30/200 ]	train_loss : 0.0266 	 test_loss : 0.0196
[ Epoch:   40/200 ]	train_loss : 0.0092 	 test_loss : 0.0026
[ Epoch:   50/200 ]	train_loss : 0.0182 	 test_loss : 0.0023
[ Epoch:   60/200 ]	train_loss : 0.0100 	 test_loss : 0.0054
[ Epoch:   70/200 ]	train_loss : 0.0117 	 test_loss : 0.0063
[ Epoch:   80/200 ]	train_loss : 0.0099 	 test_loss : 0.0025
[ Epoch:   90/200 ]	train_loss : 0.0096 	 test_loss : 0.0021
[ Epoch:  100/200 ]	train_loss : 0.0086 	 test_loss : 0.0034
[ Epoch:  110/200 ]	train_loss : 0.0085 	 test_loss : 0.0058
[ Epoch:  120/200 ]	train_loss : 0.0080 	 test_loss : 0.0044
[ Epoch:  130/200 ]	train_loss : 0.0076 	 test_loss : 0.0033
[ Epoch:  140/200 ]	train_loss : 0.0079 	 test_loss : 0.0038
[ Epoch:  150/200 ]	train_loss : 0.0077 	 test_loss : 0.0044
[ Epoch:  160/200 ]	trai