In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM
from tensorflow.keras.layers import SpatialDropout1D, SimpleRNN, GRU, Flatten

In [9]:
df = pd.read_csv("./stock_dataset_2.csv")
data = df.iloc[:, 2:10].values
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(data)
scaled.shape

(6109, 8)

In [10]:
def get_train_data(time_step=20, train_begin=0, train_end=5800):
    data_train = data[train_begin:train_end]
    y_ = data_train[time_step: train_end, 7]
    normalized_train_data = data_train
    train_x, train_y = [], []
    for i in range(len(normalized_train_data) - time_step):
        x = normalized_train_data[i: i+time_step, :7]
        y = normalized_train_data[i+time_step: i+1+time_step, 7]
        train_x.append(x.tolist())
        train_y.append(y.tolist())

    mean, std = np.mean(train_y, axis=0), np.std(train_y, axis=0)
    train_x, train_y, mean, std = np.array(train_x), np.array(train_y), np.array(mean), np.array(std)
    train_x = (train_x - np.mean(train_x, axis=0))/np.std(train_x, axis=0)
    train_y = (train_y - np.mean(train_y, axis=0))/np.std(train_y, axis=0)

    return train_x, train_y, mean, std, y_

print(data.shape)
train_x, train_y, _, _,_= get_train_data(20, 2000, 5800)                #长度为20，time_step为时间序列的长度
test_x, test_y, test_mean, test_std, test_y_= get_train_data(20, 5800, len(data))

print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)
print(test_mean.shape)
print(test_std.shape)
print(test_y_.shape)

(6109, 8)
(3780, 20, 7)
(3780, 1)
(289, 20, 7)
(289, 1)
(1,)
(1,)
(289,)


In [12]:
## 初始化序贯模型
model = Sequential()
## RNN层，在输入后的线性转换步骤添加随机失活，在循环阶段的线性转换也添加随机失活，失活概率都为0.2
model.add(LSTM(128, input_shape=(20, 7), dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=False))
#model.add(Dropout(0.5))
#model.add(LSTM(1, dropout=0.2, recurrent_dropout=0.2, return_sequences=False))
#model.add(Dropout(0.5))
# model.add(Flatten())
#model.add(Dense(1, activation='relu'))
model.add(Dense(1))

## 定义损失函数为MSE，优化方法为Adam，模型评价标准为输出值与标签值的差
model.compile(loss='mse', optimizer='Adam', metrics=['mse'])
## 查看模型概要
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 20, 128)           69632     
                                                                 
 lstm_3 (LSTM)               (None, 64)                49408     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 119,105
Trainable params: 119,105
Non-trainable params: 0
_________________________________________________________________
None


In [13]:
# 模型训练（这里面的参数需要你自己来调，我用的这些数据训练的结果并不好）
history = model.fit(x=train_x, y=train_y, ## 指定训练数据
                    batch_size=60, ## batch大小为100
                    epochs=400, ## 迭代100轮
                    validation_data=(test_x,test_y))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

In [None]:
x, y, x_mean, y_std, y_label= get_train_data(20,5800,6000)
print(x.shape)
print(x)
print(x_mean.shape)
print(y_std.shape)
print(y_label.shape)
# x = np.expand_dims(x, 1)
result = model.predict(x, batch_size=1)
print(result.shape)
print(result)
# result = result[0, 49, 0]
# 将标准化的数据还原至真实数据值，并且计算测试集偏差
result=np.array(result)*y_std+x_mean
print(result.shape)

# 将标准化的数据还原至真实数据值，并且计算测试集偏差
# result=np.array(result)*test_std[13]+test_mean[13]
# print(result)

result = np.squeeze(result)
print(y_std, x_mean)
print(y_label)
print(result)

In [None]:
# 折线图展示
plt.figure()
data_list=[str(i) for i in range(0, 180)]
plt.plot(list(range(len(data_list[0:180]))), y_label[0:180], color='r')
plt.plot(list(range(len(data_list))), result, color='b')
plt.xlabel('date')
plt.ylabel('stock index')
#plt.plot(list(range(len(test_predict))), test_predict, color='b')
#plt.plot(list(range(len(test_y))), test_y, color='r')
plt.show()