In [1]:
import FinanceDataReader as fdr
import datetime

import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

2021-12-03 17:32:20.651446: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-03 17:32:20.651468: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os

%matplotlib inline
warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'NanumGothic'

In [3]:
def windowed_dataset(series, window_size, batch_size, shuffle):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    if shuffle:
        ds = ds.shuffle(1000)
    ds = ds.map(lambda w: (w[:-1], w[-1]))
    return ds.batch(batch_size).prefetch(1)

In [4]:
def applyModel(companyName, predictDate):
    symbol = str(df_kospi.loc[df_kospi['Name'] == companyName]['Symbol'].values[0])
    companytable = fdr.DataReader(symbol)
    companytable['Year'] = companytable.index.year
    companytable['Month'] = companytable.index.month
    companytable['Day'] = companytable.index.day
    
    scaler = MinMaxScaler()
    scale_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    scaled = scaler.fit_transform(companytable[scale_cols])
    df = pd.DataFrame(scaled, columns=scale_cols)
    
    x_train, x_test, y_train, y_test = train_test_split(df.drop('Close', 1), df['Close'], test_size=0.4, random_state=0, shuffle=False)
    x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0, shuffle=False)
    
    WINDOW_SIZE=20
    BATCH_SIZE=32
    
    train_data = windowed_dataset(y_train, WINDOW_SIZE, BATCH_SIZE, True)
    validate_data = windowed_dataset(y_val, WINDOW_SIZE, BATCH_SIZE, True)
    test_data = windowed_dataset(y_test, WINDOW_SIZE, BATCH_SIZE, False)
    
    model = Sequential([
        # 1차원 feature map 생성
        Conv1D(filters=32, kernel_size=5,
               padding="causal",
               activation="relu",
               input_shape=[WINDOW_SIZE, 1]),
        # LSTM
        LSTM(16, activation='tanh'),
        Dense(16, activation="relu"),
        Dense(1),
    ])
    
    loss = Huber()
    optimizer = Adam(0.0005)
    model.compile(loss=Huber(), optimizer=optimizer, metrics=['mse'])
    
    earlystopping = EarlyStopping(monitor='val_loss', patience=10)
    filename = os.path.join('tmp', 'ckeckpointer.ckpt')
    checkpoint = ModelCheckpoint(filename, 
                                 save_weights_only=True, 
                                 save_best_only=True, 
                                 monitor='val_loss', 
                                 verbose=1)
    
    history = model.fit(train_data, 
                        validation_data=(validate_data), 
                        epochs=50, 
                        callbacks=[checkpoint, earlystopping])
    
    model.load_weights(filename)
    
    #주어진 데이터 적용
    forpredict = fdr.DataReader(symbol, predictDate - datetime.timedelta(days=40), predictDate- datetime.timedelta(days=1))
    forpredict = forpredict[-20:]
    scaler = MinMaxScaler()
    scale_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    scaled = scaler.fit_transform(forpredict[scale_cols])
    df = pd.DataFrame(scaled, columns=scale_cols)
    my_test = df['Close']
    my_test.loc['20'] = 0
    my_test_data = windowed_dataset(my_test, WINDOW_SIZE, BATCH_SIZE, False)
    my_pred = model.predict(my_test_data)
    
    myzeros = np.zeros((my_pred.shape[0],5))
    newpred = my_pred + myzeros
    price_pred = scaler.inverse_transform(newpred)
    price_pred = price_pred[:,3:4]
    
    return price_pred

In [5]:
df_kospi = fdr.StockListing('KOSPI')

predict_Date = datetime.date(2021 , 12, 3) 
#주가 예측을 원하는 날짜
#현재는 하루 이후만 가능함, 과거 날짜는 모두 괜찮음(아직 안나온 기준 하루)
predict_Company = '호텔신라' 
#주가 예측을 원하는 기업

predicted_price = applyModel(predict_Company, predict_Date)

2021-12-03 17:32:39.444863: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-03 17:32:39.445311: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-03 17:32:39.445386: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2021-12-03 17:32:39.445449: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2021-12-03 17:32:39.447244: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

Epoch 1/50
    111/Unknown - 2s 5ms/step - loss: 5.1778e-05 - mse: 1.0356e-04
Epoch 00001: val_loss improved from inf to 0.00142, saving model to tmp/ckeckpointer.ckpt
Epoch 2/50
Epoch 00002: val_loss did not improve from 0.00142
Epoch 3/50
Epoch 00003: val_loss did not improve from 0.00142
Epoch 4/50
Epoch 00004: val_loss improved from 0.00142 to 0.00103, saving model to tmp/ckeckpointer.ckpt
Epoch 5/50
Epoch 00005: val_loss did not improve from 0.00103
Epoch 6/50
Epoch 00006: val_loss did not improve from 0.00103
Epoch 7/50
Epoch 00007: val_loss improved from 0.00103 to 0.00088, saving model to tmp/ckeckpointer.ckpt
Epoch 8/50
Epoch 00008: val_loss did not improve from 0.00088
Epoch 9/50
Epoch 00009: val_loss improved from 0.00088 to 0.00073, saving model to tmp/ckeckpointer.ckpt
Epoch 10/50
Epoch 00010: val_loss improved from 0.00073 to 0.00070, saving model to tmp/ckeckpointer.ckpt
Epoch 11/50
Epoch 00011: val_loss did not improve from 0.00070
Epoch 12/50
Epoch 00012: val_loss did 

In [6]:
print("예측 가격은", predicted_price[0][0],"원")

예측 가격은 72997.45427668095 원
