In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import json
import requests
response = requests.get(
    "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/MoneyFlow.ssl_qsfx_lscjfb?page=1&num=1000&sort=opendate&asc=0&daima=sz002057",
)

# 通过json.loads()方法将json字符串转换为python对象

a = json.loads(response.text)
# 将a转换为DataFrame

data = pd.DataFrame(a)
data.drop(['r0', 'r1', 'r2', 'r3'], axis=1, inplace=True)

# 修改列名
data.columns = ["日期", "收盘价", "涨跌幅", "换手率", "净流入额",
                "净流入占比", "超大单净流入", "大单净流入", "小单净流入", "散单净流入"]
#
# 修改数据类型
data['日期'] = pd.to_datetime(data['日期'])
data['收盘价'] = data['收盘价'].astype(float)
data['涨跌幅'] = data['涨跌幅'].astype(float)
data['换手率'] = data['换手率'].astype(float)
data['净流入额'] = data['净流入额'].astype(float)
data['净流入占比'] = data['净流入占比'].astype(float)
data['超大单净流入'] = data['超大单净流入'].astype(float)
data['大单净流入'] = data['大单净流入'].astype(float)
data['小单净流入'] = data['小单净流入'].astype(float)
data['散单净流入'] = data['散单净流入'].astype(float)
data = data.sort_values(by='日期', ascending=True)
# 将日期设置为索引
data.set_index('日期', inplace=True)
data.head()


# 使用GRU对涨幅单步预测

# 数据预处理

n_samples = 1000
n_features = 9
seq_len = 30
# 使用滑动窗口将数据转换为时间序列数据


def sliding_window(DataSet, X_width, y_width, gap=1, multi_vector=None, X_data=True):
    '''
    DataSet has to be as a DataFrame
    '''
    if X_data:
        if multi_vector:
            a, b = DataSet.shape
        else:
            a = DataSet.shape[0]
            b = 1
        c = (a-X_width-y_width-a % gap)/gap
        X = np.reshape(DataSet.iloc[0:X_width, :].values, (1, X_width, b))
        for i in range(len(DataSet) - X_width - y_width):
            i += 1
            if i > c:
                break
            j = i * gap
            tmp = DataSet.iloc[j:j + X_width, :].values
            tmp = np.reshape(tmp, (1, X_width, b))
            X = np.concatenate([X, tmp], 0)
        return X
    else:
        if multi_vector:
            print('y_data-error：expect 1D ,given %dD' % DataSet.shape[1])
            return
        else:
            a = DataSet.shape[0]
        c = (a-X_width-y_width-a % gap)/gap
        y = np.reshape(
            DataSet.iloc[X_width:X_width + y_width].values, (1, y_width))
        for i in range(len(DataSet) - X_width - y_width):
            i += 1
            if i > c:
                break
            j = i * gap + X_width
            tmp = DataSet.iloc[j:j + y_width].values
            tmp = np.reshape(tmp, (1, y_width))
            y = np.concatenate([y, tmp])
        return y


X = sliding_window(data, seq_len, 1, 1, True, True)

y = sliding_window(data["收盘价"], seq_len, 1, 1, multi_vector=False, X_data=None)

# 标准化


def normalize(x):
    x = (x - x.mean()) / x.std()
    return x


train_rate = 0.8
train_num = int(n_samples * train_rate)


X_train = np.array(X[:train_num])
X_test = np.array(X[train_num:])

y_train = y[:train_num]
y_test = y[train_num:]
# 标准化训练集与测试集
X_train = np.apply_along_axis(normalize, axis=1, arr=X_train)
X_test = np.apply_along_axis(normalize, axis=1, arr=X_test)
# 定义模型结构
model = tf.keras.Sequential([
    tf.keras.layers.GRU(256, input_shape=(seq_len, n_features),
                        return_sequences=True, dropout=0.2),
    tf.keras.layers.GRU(256, return_sequences=True, dropout=0.4),
    tf.keras.layers.GRU(128, return_sequences=True, dropout=0.2),
    tf.keras.layers.GRU(256, dropout=0.2),
    tf.keras.layers.Dense(1),

])


# 编译模型
model.compile(optimizer='adam', loss='mse',
              metrics=['mse'])

# 训练模型
model.fit(X_train, y_train, epochs=10, batch_size=16,
          validation_data=(X_test, y_test))

# 模型预测
preds = model.predict(X_test)

# 模型评估
loss = model.evaluate(X_test, y_test)
print('Test loss:', loss)


preds.shape

# 画图并显示图例

plt.plot(preds, label='predit',)
plt.plot(y_test, label='real',)
plt.legend()


Epoch 1/10
Epoch 2/10
Epoch 3/10
 6/50 [==>...........................] - ETA: 3s - loss: 3.8575 - mse: 3.8575

KeyboardInterrupt: 