In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, MultiHeadAttention
from tensorflow.keras.models import Model
import akshare as ak  # 无需注册的A股数据库

In [14]:

# --------------------------1. 用AkShare获取A股数据--------------------------
# A股代码格式：上交所（60XXXX）、深交所（00XXXX/30XXXX），无需加.SH/.SZ
stock_code = "600519"  # 贵州茅台（可替换为300750=宁德时代）
start_date = "2025-10-01"  # AkShare日期格式是YYYY-MM-DD
end_date = "2025-10-31"

# 获取A股日线数据（前复权，消除除权除息影响）
# ak.stock_zh_a_hist() 返回A股历史K线数据
df = ak.stock_zh_a_hist(
    symbol=stock_code,
    period="daily",  # 日线数据
    start_date=start_date,
    end_date=end_date,
    adjust="qfq"  # qfq=前复权（必须选，否则价格失真）
)

In [5]:

# 数据预处理：按日期升序排序，设置日期为索引
df["日期"] = pd.to_datetime(df["日期"])
df.set_index("日期", inplace=True)

KeyError: '日期'

In [3]:

# 选择特征（和模型对齐：开盘价、最高价、最低价、收盘价、成交量）
features = ["开盘", "最高", "最低", "收盘", "成交量"]  # AkShare字段名是中文
data = df[features].dropna()
print(f"获取到的{stock_code}数据形状: {data.shape}")
print("前5行数据：")
print(data.head())

KeyError: '日期'

In [None]:

# --------------------------2. 后续流程和之前完全一致--------------------------
# 数据归一化
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# 划分训练集/测试集、构建序列数据（和Tushare版本完全相同）
train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]
time_window = 30
close_idx = features.index("收盘")  # 注意这里是中文"收盘"

def create_sequences(data, time_window, target_idx):
    X, y = [], []
    for i in range(time_window, len(data)):
        X.append(data[i - time_window:i])
        y.append(data[i, target_idx])
    return np.array(X), np.array(y)

train_X, train_y = create_sequences(train_data, time_window, close_idx)
test_X, test_y = create_sequences(test_data, time_window, close_idx)

# 模型构建、训练、预测、可视化（和Tushare版本完全相同）
def build_lstm_attention_model(input_shape):
    inputs = Input(shape=input_shape)
    lstm_out = LSTM(50, return_sequences=True, activation='tanh')(inputs)
    attention_out = MultiHeadAttention(num_heads=8, key_dim=64)(lstm_out, lstm_out)
    combined = lstm_out + attention_out
    last_step = combined[:, -1, :]
    output = Dense(1, activation='linear')(last_step)
    return Model(inputs=inputs, outputs=output)

model = build_lstm_attention_model((time_window, len(features)))
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

history = model.fit(
    train_X, train_y,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# 反归一化函数
def inverse_transform_pred(y_pred, scaler, features, target_idx):
    y_reshaped = np.zeros(shape=(len(y_pred), len(features)))
    y_reshaped[:, target_idx] = y_pred.flatten()
    return scaler.inverse_transform(y_reshaped)[:, target_idx]

train_pred = inverse_transform_pred(train_pred, scaler, features, close_idx)
test_pred = inverse_transform_pred(test_pred, scaler, features, close_idx)
train_y_true = inverse_transform_pred(train_y, scaler, features, close_idx)
test_y_true = inverse_transform_pred(test_y, scaler, features, close_idx)

# 绘图（和之前一致）
train_dates = data.index[time_window:train_size]
test_dates = data.index[train_size + time_window:]

plt.figure(figsize=(12, 4))
plt.plot(train_dates, train_y_true, label='真实收盘价', color='blue')
plt.plot(train_dates, train_pred, label='预测收盘价', color='red', alpha=0.7)
plt.title(f'{stock_code} 训练集价格预测（A股-AkShare）')
plt.xlabel('日期')
plt.ylabel('价格（元）')
plt.legend()
plt.show()

plt.figure(figsize=(12, 4))
plt.plot(test_dates, test_y_true, label='真实收盘价', color='blue')
plt.plot(test_dates, test_pred, label='预测收盘价', color='red', alpha=0.7)
plt.title(f'{stock_code} 测试集价格预测（A股-AkShare）')
plt.xlabel('日期')
plt.ylabel('价格（元）')
plt.legend()
plt.show()