# LSTM

In [3]:
import tensorflow as tf
from pykrx import stock

In [4]:
stock_name = stock.get_market_ticker_name("005930") # 삼섬전자
stock_name

'삼성전자'

In [5]:
# 삼성전자 주가데이터
raw_df = stock.get_market_ohlcv_by_date(fromdate="20220101", todate="20221101", ticker="005930")
raw_df.head()

Unnamed: 0_level_0,시가,고가,저가,종가,거래량
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-03,79400,79800,78200,78600,13502112
2022-01-04,78800,79200,78300,78700,12427416
2022-01-05,78800,79000,76400,77400,25470640
2022-01-06,76700,77600,76600,76900,12931954
2022-01-07,78100,78400,77400,78300,15163757


In [6]:
df=raw_df[['종가']]
df.reset_index(inplace=True)
df

Unnamed: 0,날짜,종가
0,2022-01-03,78600
1,2022-01-04,78700
2,2022-01-05,77400
3,2022-01-06,76900
4,2022-01-07,78300
...,...,...
199,2022-10-26,59400
200,2022-10-27,59500
201,2022-10-28,57300
202,2022-10-31,59400


In [7]:
# *-- 데이터 전처리 --*
# sklearn을 활용한 data scaling
from sklearn.preprocessing import RobustScaler
# RobustScaler : Scale features using statistics that are robust to outliers.
rb = RobustScaler()

In [8]:
df_scaled = rb.fit_transform(df[['종가']])
df['종가'] = df_scaled
df.head() # 스케일링 결과 확인(-1~1 사이의 값으로 스케일링)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['종가'] = df_scaled


Unnamed: 0,날짜,종가
0,2022-01-03,1.186667
1,2022-01-04,1.195556
2,2022-01-05,1.08
3,2022-01-06,1.035556
4,2022-01-07,1.16


In [9]:
# train, test set 분리
test_size = 100 # data split size
train_data = df[:-test_size]
test_data = df[-test_size:]

In [10]:
train_data

Unnamed: 0,날짜,종가
0,2022-01-03,1.186667
1,2022-01-04,1.195556
2,2022-01-05,1.080000
3,2022-01-06,1.035556
4,2022-01-07,1.160000
...,...,...
99,2022-05-30,0.217778
100,2022-05-31,0.191111
101,2022-06-02,0.128889
102,2022-06-03,0.137778


In [11]:
# 당일 데이터 예측에 +n일의 과거 데이터를 반영한다.
import pandas as pd
window_size = 15 # 예측에 반영할 과거 데이터 일수

for i in range(1, 15):
    train_data[f'종가_{i}'] = train_data['종가'].shift(i)
    test_data[f'종가_{i}'] = test_data['종가'].shift(i)
        # train, test 데이터를 하루 씩 옮기면서 과거 데이터를 형성

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data[f'종가_{i}'] = train_data['종가'].shift(i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[f'종가_{i}'] = test_data['종가'].shift(i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data[f'종가_{i}'] = train_data['종가'].shift(i)
A value is trying to be set on a copy of a slice from a D

In [14]:
# 데이터 확인
train_data.head(10)

Unnamed: 0,날짜,종가,종가_1,종가_2,종가_3,종가_4,종가_5,종가_6,종가_7,종가_8,종가_9,종가_10,종가_11,종가_12,종가_13,종가_14
0,2022-01-03,1.186667,,,,,,,,,,,,,,
1,2022-01-04,1.195556,1.186667,,,,,,,,,,,,,
2,2022-01-05,1.08,1.195556,1.186667,,,,,,,,,,,,
3,2022-01-06,1.035556,1.08,1.195556,1.186667,,,,,,,,,,,
4,2022-01-07,1.16,1.035556,1.08,1.195556,1.186667,,,,,,,,,,
5,2022-01-10,1.133333,1.16,1.035556,1.08,1.195556,1.186667,,,,,,,,,
6,2022-01-11,1.213333,1.133333,1.16,1.035556,1.08,1.195556,1.186667,,,,,,,,
7,2022-01-12,1.213333,1.213333,1.133333,1.16,1.035556,1.08,1.195556,1.186667,,,,,,,
8,2022-01-13,1.124444,1.213333,1.213333,1.133333,1.16,1.035556,1.08,1.195556,1.186667,,,,,,
9,2022-01-14,1.071111,1.124444,1.213333,1.213333,1.133333,1.16,1.035556,1.08,1.195556,1.186667,,,,,


In [16]:
# 과거 데이터가 채워지지 않으면 drop함
train_data.dropna(inplace=True)
X_train = train_data.drop('종가', axis=1)
y_train = train_data[['종가']]

test_data.dropna(inplace=True)
X_test = test_data.drop('종가', axis=1)
y_test = test_data[['종가']]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data.dropna(inplace=True)


In [17]:
# train, test 사이즈를 확인하고, 신경망 학습을 위해 reshape한다
X_train= X_train.values
X_test= X_test.values
y_train = y_train.values
y_test = y_test.values

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(90, 15) (90, 1) (86, 15) (86, 1)


In [18]:
X_train = X_train.reshape((X_train.shape[0], 15, 1 ))
X_test= X_test.reshape((X_test.shape[0], 15, 1 ))

In [19]:
X_train = X_train.reshape((X_train.shape[0], 15, 1 ))
X_test= X_test.reshape((X_test.shape[0], 15, 1 ))

In [21]:
# 신경망 생성
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import backend as K
K.clear_session()

In [22]:
# 모델 구성하기
model = Sequential()
model.add(LSTM(14, return_sequences = True, input_shape = (14, 1)))
model.add(LSTM(28, return_sequences=False))
model.add(Dense(1, activation='linear'))

In [23]:
# 모델 엮기
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 14, 14)            896       
                                                                 
 lstm_1 (LSTM)               (None, 28)                4816      
                                                                 
 dense (Dense)               (None, 1)                 29        
                                                                 
Total params: 5,741
Trainable params: 5,741
Non-trainable params: 0
_________________________________________________________________


In [25]:
# 모델 학습
# 과적합 방지를 위해 EarlyStopping 사용
es = EarlyStopping(monitor='loss', patience=5, verbose= 1)
model.fit(X_train, y_train, epochs = 50, batch_size=16, verbose=1, callbacks=[es]) #???

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type Timestamp).

In [26]:
# monitor : 학습 조기종료를 위해 관찰하는 항목 (default : val_loss)
# patience : 개선이 안된다고 바로 종료시키지 않고, 개선을 위해 몇번의 에포크를 기다릴지 설정
# verbose=1 로 지정하면, 언제 keras 에서 training 을 멈추었는지를 화면에 출력할 수 있다.