In [1]:
import numpy as np
import pandas as pd
import pymysql
import yfinance as yf
from pykrx import stock
import warnings
%matplotlib inline
warnings.filterwarnings('ignore')

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LSTM

def make_dataset(data, label, window_size=20):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

df_pred = pd.DataFrame(columns = ['date','s_ticker','s_name','predict'])

names = ['교보증권', 'DGB금융지주', 'BNK금융지주', '효성', '부국증권', '현대차증권', '롯데하이마트', '광주신세계', '현대홈쇼핑',
         'GS리테일', '화승인더', 'HDC', '신라교역', '대한제당', '하이트진로홀딩스', '대상', '조흥', '삼양사', '동아타이어', '태영건설',
         '한국가스공사', '금호석유', 'DL이앤씨', 'POSCO홀딩스', '한화투자증권', 'SK텔레콤', '지투알', 'KT', 'LG유플러스', '이노션',
         '롯데하이마트', 'SK텔레콤', '휴비스', '한국주철관', 'KT&G', 'KT', 'HD현대', '세아베스틸지주', 'DGB금융지주', 'BNK금융지주',
         '화성산업', '효성']
today = input('(ex:20221026) >> ')

(ex:20221026) >> 20221026


In [2]:
s_name = names[0]
conn = pymysql.connect (host='localhost',user ='ant',password='roal',db='antdb',charset='utf8')
cur = conn.cursor()
sql = f"select * from stock_db where s_name='{s_name}'" 
cur.execute(sql)
df = pd.DataFrame(cur.fetchall(), columns=['ticker','name','date','open','high','low','close','volume','kospi','kospi200','kospi100','kospi50','IXIC','SnP500','HSI'])
df['date'] = pd.to_datetime(df['date'])
s_ticker = df['ticker'][0]


scale_cols = ['open','high','low','volume','kospi','kospi200','IXIC','SnP500','HSI']

scaler_x = MinMaxScaler().fit(df[scale_cols])
scaled_x = scaler_x.transform(df[scale_cols])
df_scaled_x = pd.DataFrame(scaled_x)

scaler_y = MinMaxScaler().fit(df['close'].values.reshape(-1,1))
scaled_y = scaler_y.transform(df['close'].values.reshape(-1,1))
df_scaled_x.columns = scale_cols

train = df_scaled_x.copy()
train['close'] = scaled_y

feature_cols = ['open','high','low','volume','kospi','kospi200','IXIC','SnP500','HSI']
label_cols = ['close']

train_feature = train[feature_cols]
train_label = train[label_cols]

# train dataset
train_feature, train_label = make_dataset(train_feature, train_label, 20)

# train, validation set 생성
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(train_feature, train_label, test_size=0.2)

model = Sequential()
model.add(LSTM(16, 
               input_shape=(train_feature.shape[1], train_feature.shape[2]), 
               activation='relu', 
               return_sequences=False)
          )
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')
early_stop = EarlyStopping(monitor='val_loss', patience=5)
checkpoint = ModelCheckpoint('tmp_checkpoint.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

model.fit(x_train, y_train, 
                epochs=200, 
                batch_size=16,
                validation_data=(x_valid, y_valid),
                callbacks=[early_stop, checkpoint])


real = stock.get_market_ohlcv_by_date(fromdate="20220101", todate=today, ticker=f'{s_ticker}')[-20:].drop('종가',axis=1)
kospi = stock.get_index_fundamental("20220101", today, "1001")[['종가']][-20:]
kospi200 = stock.get_index_fundamental("20220101", today, "1028")[['종가']][-20:]
# kospi100 = stock.get_index_fundamental("20220101", today, "1034")[['종가']][-20:]
# kospi50 = stock.get_index_fundamental("20220101", today, "1035")[['종가']][-20:]
IXIC = yf.download('^IXIC',start = '2022-01-01', end=f'{today[:4]}-{today[4:6]}-{today[6:]}')[['Close']][-20:]
SnP500 = yf.download('^GSPC',start = '2022-01-01', end=f'{today[:4]}-{today[4:6]}-{today[6:]}')[['Close']][-20:]
HSI = yf.download('^GSPC',start = '2022-01-01', end=f'{today[:4]}-{today[4:6]}-{today[6:]}')[['Close']][-20:]

real['kospi'] = list(kospi['종가'])
real['kospi200'] = list(kospi200['종가'])
# real['kospi100'] = list(kospi100['종가'])
# real['kospi50'] = list(kospi50['종가'])
real['IXIC'] = list(IXIC['Close'])
real['SnP500'] = list(SnP500['Close'])
real['HSI'] = list(HSI['Close'])

real = scaler_x.transform(real)
model_inputs = np.array([real])

pred = model.predict(model_inputs)
pred = scaler_y.inverse_transform(pred)

ex = pd.DataFrame({'date':[pd.to_datetime(today)],'s_ticker':[s_ticker],'s_name':[s_name],'predict':[pred[0][0]]})
df_pred = df_pred.append(ex,ignore_index=True)
print(df_pred)

Epoch 1/200
Epoch 1: val_loss improved from inf to 0.00035, saving model to tmp_checkpoint.h5
Epoch 2/200
Epoch 2: val_loss improved from 0.00035 to 0.00035, saving model to tmp_checkpoint.h5
Epoch 3/200
Epoch 3: val_loss improved from 0.00035 to 0.00030, saving model to tmp_checkpoint.h5
Epoch 4/200
Epoch 4: val_loss improved from 0.00030 to 0.00029, saving model to tmp_checkpoint.h5
Epoch 5/200
Epoch 5: val_loss improved from 0.00029 to 0.00028, saving model to tmp_checkpoint.h5
Epoch 6/200
Epoch 6: val_loss improved from 0.00028 to 0.00022, saving model to tmp_checkpoint.h5
Epoch 7/200
Epoch 7: val_loss did not improve from 0.00022
Epoch 8/200
Epoch 8: val_loss did not improve from 0.00022
Epoch 9/200
Epoch 9: val_loss improved from 0.00022 to 0.00020, saving model to tmp_checkpoint.h5
Epoch 10/200
Epoch 10: val_loss improved from 0.00020 to 0.00020, saving model to tmp_checkpoint.h5
Epoch 11/200
Epoch 11: val_loss did not improve from 0.00020
Epoch 12/200
Epoch 12: val_loss did not