In [1]:
#  라이브러리 로드
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader # 데이터로더
from torch.utils.data import TensorDataset # 텐서 데이터셋
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler 
import datetime
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # KMP 오류 방지(터널 오류 방지)

In [2]:
df = pd.read_csv('data/005930.KS01.csv')
print(df.head())
print('--------------------------------------------------------------')
print(df.info())

         Date     Adj Close    Close     High      Low     Open    Volume
0  2010-01-04  12051.839844  16180.0  16180.0  16000.0  16060.0  11950800
1  2010-01-05  12245.499023  16440.0  16580.0  16300.0  16520.0  27925850
2  2010-01-06  12528.544922  16820.0  16820.0  16520.0  16580.0  22948850
3  2010-01-07  12111.428711  16260.0  16820.0  16260.0  16820.0  22107950
4  2010-01-08  12230.602539  16420.0  16420.0  16120.0  16400.0  14777550
--------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3811 entries, 0 to 3810
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       3811 non-null   object 
 1   Adj Close  3811 non-null   float64
 2   Close      3811 non-null   float64
 3   High       3811 non-null   float64
 4   Low        3811 non-null   float64
 5   Open       3811 non-null   float64
 6   Volume     3811 non-null   int64  
dtypes: float64(5), int64(1

In [3]:
df1 = df.set_index(pd.to_datetime(df['Date']))
print(df1.head())
df1 = df1[['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']]
df1.head()

                  Date     Adj Close    Close     High      Low     Open  \
Date                                                                       
2010-01-04  2010-01-04  12051.839844  16180.0  16180.0  16000.0  16060.0   
2010-01-05  2010-01-05  12245.499023  16440.0  16580.0  16300.0  16520.0   
2010-01-06  2010-01-06  12528.544922  16820.0  16820.0  16520.0  16580.0   
2010-01-07  2010-01-07  12111.428711  16260.0  16820.0  16260.0  16820.0   
2010-01-08  2010-01-08  12230.602539  16420.0  16420.0  16120.0  16400.0   

              Volume  
Date                  
2010-01-04  11950800  
2010-01-05  27925850  
2010-01-06  22948850  
2010-01-07  22107950  
2010-01-08  14777550  


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,16060.0,16180.0,16000.0,16180.0,11950800,12051.839844
2010-01-05,16520.0,16580.0,16300.0,16440.0,27925850,12245.499023
2010-01-06,16580.0,16820.0,16520.0,16820.0,22948850,12528.544922
2010-01-07,16820.0,16820.0,16260.0,16260.0,22107950,12111.428711
2010-01-08,16400.0,16420.0,16120.0,16420.0,14777550,12230.602539


In [4]:
seq_length=7
batch_size=100
train_size=int(len(df1) * 0.9)
test_size=len(df1) - train_size
train_set = df1[:train_size]
test_set = df1[train_size-seq_length:]
test_set.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-11-20,72100.0,73000.0,72100.0,72700.0,10610157,70408.835938
2023-11-21,73100.0,73400.0,72700.0,72800.0,9712881,70505.679688
2023-11-22,72200.0,73000.0,71900.0,72800.0,11105143,70505.679688
2023-11-23,73000.0,73200.0,72200.0,72400.0,6775614,70118.289062
2023-11-24,72400.0,72600.0,71700.0,71700.0,6676685,69440.351562


In [5]:
scaler_x = MinMaxScaler()
scaler_x.fit(df1.iloc[:,:-1])
scaler_y = MinMaxScaler()
scaler_y.fit(df1.iloc[:,[-1]])

train_set.iloc[:,:-1] = scaler_x.transform(train_set.iloc[:,:-1])
print(train_set.head())

train_set.iloc[:,[-1]] = scaler_y.transform(train_set.iloc[:,[-1]])
print(train_set.head())

test_set.iloc[:,:-1] = scaler_x.transform(test_set.iloc[:,:-1])
test_set.iloc[:,[-1]] = scaler_y.transform(test_set.iloc[:,[-1]])
print(test_set.head())


                Open      High       Low     Close    Volume     Adj Close
Date                                                                      
2010-01-04  0.031062  0.027034  0.033658  0.033333  0.132336  12051.839844
2010-01-05  0.037066  0.031861  0.037602  0.036693  0.309235  12245.499023
2010-01-06  0.037849  0.034757  0.040494  0.041602  0.254123  12528.544922
2010-01-07  0.040981  0.034757  0.037076  0.034367  0.244811  12111.428711
2010-01-08  0.035500  0.029930  0.035235  0.036434  0.163638  12230.602539
                Open      High       Low     Close    Volume  Adj Close
Date                                                                   
2010-01-04  0.031062  0.027034  0.033658  0.033333  0.132336   0.023659
2010-01-05  0.037066  0.031861  0.037602  0.036693  0.309235   0.026208
2010-01-06  0.037849  0.034757  0.040494  0.041602  0.254123   0.029935
2010-01-07  0.040981  0.034757  0.037076  0.034367  0.244811   0.024443
2010-01-08  0.035500  0.029930  0.035235  0

  train_set.iloc[:,:-1] = scaler_x.transform(train_set.iloc[:,:-1])
 0.1470894  0.10280507 0.17478    0.10930907 0.11327317 0.1343173
 0.08995051 0.09813301 0.12025161 0.10920582 0.15235554 0.14524772
 0.30526808 0.17075039 0.10730773 0.09863813 0.18681065 0.1492563
 0.16073771 0.14578083 0.22867807 0.19707994 0.18983028 0.24088766
 0.16969425 0.12517766 0.12279032 0.28812258 0.22434267 0.63884075
 0.1443859  0.03275429 0.16344856 0.25118614 0.19769851 0.2587135
 0.21785193 0.16373436 0.14241175 0.12997723 0.12358027 0.15476816
 0.13558782 0.17389242 0.22015142 0.16561305 0.2106835  0.16122102
 0.18344753 0.23044612 0.24324742 0.13769761 0.15636361 0.14887997
 0.14092507 0.16257445 0.12738326 0.16841521 0.1796684  0.16244018
 0.14619134 0.13062073 0.23449562 0.25701978 0.21598882 0.23860943
 0.16075271 0.21340012 0.10786088 0.14408377 0.16879392 0.24965667
 0.25004441 0.12756988 0.17026594 0.55484906 0.49354098 0.29593503
 0.20711757 0.33831012 0.1929502  0.27777515 0.30038218 0.222758