In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from sklearn.metrics import r2_score

Using TensorFlow backend.


In [2]:
train_data_frame = pd.read_csv('./data_01_train.csv')
print(train_data_frame.columns.values.tolist())
train_data = train_data_frame['close']

test_data_frame = pd.read_csv('./data_01_test.csv')
print(test_data_frame.columns.values.tolist())
test_data = test_data_frame['close']

['date', 'open', 'high', 'low', 'close', 'volume']
['date', 'open', 'high', 'low', 'close', 'volume']


In [3]:
# 数据归一化
class MinMaxScaler:
    def __init__(self):
        self.min_num = None
        self.max_num = None

    def fit(self, data):
        self.max_num = np.max(data)
        self.min_num = np.min(data)

    def transform(self, data):
        return (data - self.min_num) / (self.max_num - self.min_num)

    def inverse_transform(self, data):
        return data * (self.max_num - self.min_num) + self.min_num
    
train_scaler = MinMaxScaler()
train_scaler.fit(train_data)
train_data_normalize = train_scaler.transform(train_data)

test_scaler = MinMaxScaler()
test_scaler.fit(test_data)
test_data_normalize = test_scaler.transform(test_data)

print(train_data[:10])
print(train_data_normalize[:10])
print(test_data[:10])
print(test_data_normalize[:10])

0    28.78
1    29.23
2    29.26
3    28.50
4    28.67
5    26.76
6    26.94
7    26.72
8    26.72
9    25.93
Name: close, dtype: float64
0    0.099027
1    0.107782
2    0.108366
3    0.093580
4    0.096887
5    0.059728
6    0.063230
7    0.058949
8    0.058949
9    0.043580
Name: close, dtype: float64
0    53.33
1    53.83
2    54.74
3    54.45
4    53.95
5    55.10
6    55.65
7    56.22
8    55.27
9    56.74
Name: close, dtype: float64
0    0.000000
1    0.012817
2    0.036145
3    0.028711
4    0.015893
5    0.045373
6    0.059472
7    0.074084
8    0.049731
9    0.087413
Name: close, dtype: float64


In [4]:
# 每次使用前8个数据，预测第9个数据
def exchange_data(data, count=8):
    X = []
    y = []
    for index in range(len(data) - count):
        X.append(data[index:index + count])
        y.append(data[index + count])
    X = np.array(X)
    X = X.reshape(X.shape[0], X.shape[1], -1)
    y = np.array(y)
    return X, y

In [5]:
# 数据变型
X_train, y_train = exchange_data(data=train_data_normalize, count=8)
X_test, y_test = exchange_data(data=test_data_normalize, count=8)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(723, 8, 1) (723,) (174, 8, 1) (174,)


In [6]:
# 建立RNN模型
rnn_model = Sequential()
# 输入层
rnn_model.add(
    SimpleRNN(
        units=5,
        input_shape=(8, 1),
        activation='relu'
    )
)
# 输出层
rnn_model.add(
    Dense(
        units=1,
        activation='linear'
    )
)
# 设置损失函数和最优化方法
rnn_model.compile(
    loss='mean_squared_error',
    optimizer='adam'
)
# 查看模型结构
rnn_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, 5)                 35        
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 6         
Total params: 41
Trainable params: 41
Non-trainable params: 0
_________________________________________________________________


In [7]:
# 模型训练
rnn_model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=200,
    verbose=1
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/

Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x1df08c6a9b0>

In [8]:
# train score
y_train_src = train_scaler.inverse_transform(y_train)
y_train_predict = rnn_model.predict(X_train)
y_train_predict_src = train_scaler.inverse_transform(y_train_predict)
train_score = r2_score(y_true=y_train_src, y_pred=y_train_predict_src)
print(train_score)

0.9961652373654785


In [9]:
# test score
y_test_src = test_scaler.inverse_transform(y_test)
y_test_predict = rnn_model.predict(X_test)
y_test_predict_src = test_scaler.inverse_transform(y_test_predict)
test_score = r2_score(y_true=y_test_src, y_pred=y_test_predict_src)
print(test_score)

0.9740601975711113


In [None]:
# train
import matplotlib.pyplot as plt
plt.plot(train_data_frame['date'], train_data_frame['close'], color='b')

# y_train_predict_src = np.vstack([np.zeros(shape=(8, 1)), y_train_predict_src])
plt.plot(train_data_frame['date'], y_train_predict_src, color='r')
plt.show()

ValueError: x and y must have same first dimension, but have shapes (731,) and (739, 1)

In [None]:
# test
plt.plot(test_data_frame['date'], test_data_frame['close'], color='b')

y_test_predict_src = np.vstack([np.zeros(shape=(8, 1)), y_test_predict_src])
plt.plot(test_data_frame['date'], y_test_predict_src, color='r')
plt.show()