# 利用RNN從前十日收盤價預測股價漲跌平

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [7]:
%env KERAS_BACKEND = tensorflow

from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.utils import np_utils

env: KERAS_BACKEND=tensorflow


Using TensorFlow backend.


## 資料處理

In [2]:
df=pd.read_excel('大盤指數盤後數據.xlsx',header=0)

In [3]:
X = [] # empty list
Y = [] # empty list
for i in range(len(df.index)-10):
    X.append(list(df.loc[i:i+9]['收盤價']))
    Y.append(df.漲跌平[i+10])

In [4]:
np.any(np.isnan(df.收盤價))

False

In [5]:
X=np.array(X)
X = X.reshape(13144, 10, 1)

In [6]:
# 標準化
for i in range(13144):
    X[i] = X[i] - X[i].min()
    X[i] = X[i] / X[i].max()

In [8]:
# one-hot encoding
Y = np_utils.to_categorical(Y, 3)

In [9]:
Y.shape

(13144, 3)

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
# 分割訓練資料和測試資料
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state = 9487)

## 模型建立

In [10]:
x = Input(shape=(10, 1))
f_1 = LSTM(20, return_sequences=True)
f_4 = Dropout(0.3)
f_2 = LSTM(20)
f_5 = Dropout(0.3)
f_3 = Dense(3, activation='softmax')

h_1 = f_1(x)
z_1 = f_4(h_1)
h_2 = f_2(z_1)
z_2 = f_5(h_2)
y = f_3(z_2)

model = Model(x, y)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 10, 1)             0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 10, 20)            1760      
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 20)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 20)                3280      
_________________________________________________________________
dropout_2 (Dropout)          (None, 20)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 63        
Total params: 5,103
Trainable params: 5,103
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.compile(loss="categorical_crossentropy", optimizer='RMSprop', metrics=['accuracy'])

In [14]:
model.fit(x_train, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0xa94d2081d0>

## 預測結果

In [15]:
model.evaluate(x_test, y_test)



[1.0869121604710514, 0.3879310344827586]

In [16]:
model.predict(x_test)

array([[0.31143504, 0.3288193 , 0.35974565],
       [0.38853648, 0.42987132, 0.18159223],
       [0.3494157 , 0.33770794, 0.3128763 ],
       ...,
       [0.32886934, 0.3155806 , 0.35555008],
       [0.33916864, 0.33810973, 0.32272157],
       [0.32770425, 0.31914443, 0.35315138]], dtype=float32)

In [17]:
predict = model.predict(x_test)
np.argmax(predict,axis=1)

array([2, 1, 0, ..., 2, 0, 2], dtype=int64)

In [18]:
y_test

array([[0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       ...,
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.]], dtype=float32)

In [19]:
from sklearn.metrics import confusion_matrix

In [20]:
confusion_matrix(np.argmax(y_test,axis=1), np.argmax(predict,axis=1))

array([[964, 215, 333],
       [750, 190, 311],
       [638, 167, 376]], dtype=int64)

## 與隨機預測比較

In [23]:
np.argmax(y_test,axis=1).shape

(3944,)

In [26]:
randompredict = np.random.randint(3, size=(3944,))

In [27]:
confusion_matrix(np.argmax(y_test,axis=1), randompredict)

array([[442, 505, 565],
       [373, 434, 444],
       [402, 383, 396]], dtype=int64)

In [28]:
(442+434+396)/3944

0.3225152129817444