In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# 加载股票K线数据
stock_data = pd.read_csv('1_601360_klines.csv')

# 选择相关特征，例如收盘价
stock_prices = stock_data[['close']].values

# 归一化股票价格
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_stock_prices = scaler.fit_transform(stock_prices)

# 创建时间序列（以20天为窗口）
time_steps = 5
X_stock = []
y_stock = []
for i in range(time_steps, len(scaled_stock_prices)):
    X_stock.append(scaled_stock_prices[i-time_steps:i, 0])
    y_stock.append(scaled_stock_prices[i, 0])

X_stock = np.array(X_stock)
y_stock = np.array(y_stock)

# 调整形状适合LSTM输入
X_stock = np.reshape(X_stock, (X_stock.shape[0], X_stock.shape[1], 1))


In [31]:
# 加载舆论情感数据
sentiment_data = pd.read_csv('comments.csv')

# 转换日期格式（如需要）
sentiment_data['update_time'] = pd.to_datetime(sentiment_data['update_time'], format='%m-%d %H:%M')

# 按日期聚合情感得分
daily_sentiment = sentiment_data.groupby(sentiment_data['update_time'].dt.date)['sentiment'].mean()

# 将情感数据转换为数组
daily_sentiment_values = daily_sentiment.values

# 归一化情感数据
scaled_sentiment = scaler.fit_transform(daily_sentiment_values.reshape(-1, 1))

# 检查 scaled_sentiment 的长度和 time_steps
print(f"scaled_sentiment 的长度: {len(scaled_sentiment)}")
print(f"time_steps 的值: {time_steps}")

# 确保 scaled_sentiment 的长度足够大
if len(scaled_sentiment) > time_steps:
    X_sentiment = []
    for i in range(time_steps, len(scaled_sentiment)):
        X_sentiment.append(scaled_sentiment[i-time_steps:i])

    # 转换为 numpy 数组
    X_sentiment = np.array(X_sentiment)

    # 检查X_sentiment的形状
    print(X_sentiment.shape)  # 应该是 (样本数, 时间步数)

    # 确保X_sentiment是三维数组 (样本数, 时间步数, 特征数=1)
    if len(X_sentiment.shape) == 2:
        X_sentiment = np.reshape(X_sentiment, (X_sentiment.shape[0], X_sentiment.shape[1], 1))

    # 检查重塑后的形状
    print(X_sentiment.shape)  # 应该是 (样本数, 时间步数, 1)
else:
    print("scaled_sentiment 的长度不足以生成时间步的序列数据。")


scaled_sentiment 的长度: 12
time_steps 的值: 5
(7, 5, 1)
(7, 5, 1)


In [32]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Concatenate, Flatten

# LSTM 分支处理股票数据
input_stock = Input(shape=(X_stock.shape[1], 1))
lstm_stock = LSTM(units=50, return_sequences=True)(input_stock)
lstm_stock = LSTM(units=50, return_sequences=False)(lstm_stock)
lstm_stock = Dropout(0.2)(lstm_stock)
# 确认生成的 X_sentiment 数组维度是否正确
print(X_sentiment.shape)  # 应该是 (样本数, 时间步长, 1)

# 如果维度不足，请查看数据是否正确生成
if len(X_sentiment.shape) != 3:
    raise ValueError(f"X_sentiment 的维度错误，当前维度: {X_sentiment.shape}")

# 确保形状正确再继续模型的构建
input_sentiment = Input(shape=(X_sentiment.shape[1], 1))
dense_sentiment = Dense(25, activation='relu')(input_sentiment)
dense_sentiment = Dropout(0.2)(dense_sentiment)

# Dense 分支处理情感数据
# input_sentiment = Input(shape=(X_sentiment.shape[1], 1))
# dense_sentiment = Dense(25, activation='relu')(input_sentiment)
# dense_sentiment = Dropout(0.2)(dense_sentiment)
dense_sentiment = Flatten()(dense_sentiment)

# 合并两个分支
merged = Concatenate()([lstm_stock, dense_sentiment])

# 输出层
output = Dense(1, activation='linear')(merged)

# 定义模型
model = Model(inputs=[input_stock, input_sentiment], outputs=output)
model.compile(optimizer='adam', loss='mean_squared_error')

# 查看模型结构
model.summary()


(7, 5, 1)


In [34]:
# 找到最小长度，确保X_test_stock和X_test_sentiment对齐
min_test_length = min(len(X_test_stock), len(X_test_sentiment))

# 截取对齐数据
X_test_stock = X_test_stock[:min_test_length]
X_test_sentiment = X_test_sentiment[:min_test_length]
y_test = y_test[:min_test_length]

# 打印检查形状
print(f"X_test_stock shape: {X_test_stock.shape}")
print(f"X_test_sentiment shape: {X_test_sentiment.shape}")
print(f"y_test shape: {y_test.shape}")

# 预测
predictions = model.predict([X_test_stock, X_test_sentiment])

# 可视化结果
plt.plot(y_test, label='真实价格')
plt.plot(predictions, label='预测价格')
plt.legend()
plt.show()



X_test_stock shape: (2, 5, 1)
X_test_sentiment shape: (2, 5, 1)
y_test shape: (2,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


NameError: name 'plt' is not defined