In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam


2025-01-01 11:07:57.596445: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:

# 1. 读取数据
# 假设数据包括 'date', 'holiday', 'nasdaq_index', 'stock_price'
data = pd.read_csv("stock_data.csv")

# 2. 特征工程
# 日期拆分为年、月、日，并进行周期性编码
data['year'] = pd.to_datetime(data['date']).dt.year
data['month'] = pd.to_datetime(data['date']).dt.month
data['day'] = pd.to_datetime(data['date']).dt.day

data['month_sin'] = np.sin(2 * np.pi * data['month'] / 12)
data['month_cos'] = np.cos(2 * np.pi * data['month'] / 12)

# 提取特征
features = ['month_sin', 'month_cos', 'day', 'holiday', 'nasdaq_index']
target = ['stock_price']

# 数据归一化
scaler_features = MinMaxScaler()
scaler_target = MinMaxScaler()

data[features] = scaler_features.fit_transform(data[features])
data[target] = scaler_target.fit_transform(data[target])

# 3. 创建时间序列数据
def create_sequences(data, feature_cols, target_col, lookback=10):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[feature_cols].iloc[i:i + lookback].values)
        y.append(data[target_col].iloc[i + lookback].values[0])
    return np.array(X), np.array(y)

lookback = 10  # 使用前 10 天数据预测下一天
X, y = create_sequences(data, features, target)

# 4. 划分训练集和测试集
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# 5. 构建 LSTM 模型
model = Sequential([
    LSTM(50, return_sequences=False, input_shape=(lookback, len(features))),
    Dense(1)  # 输出一维股价预测值
])

# 编译模型
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# 6. 训练模型
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# 7. 评估和预测
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# 预测未来股价
y_pred = model.predict(X_test)

# 反归一化预测结果
y_pred_actual = scaler_target.inverse_transform(y_pred)
y_test_actual = scaler_target.inverse_transform(y_test.reshape(-1, 1))

# 打印实际值与预测值
print("Actual values:", y_test_actual[:5].flatten())
print("Predicted values:", y_pred_actual[:5].flatten())
