In [None]:
# 定义CNN-LSTM混合模型
class CNNLSTMModel(nn.Module):
    def __init__(self, input_channels=1, input_seq_length=10, input_features=3, 
                 conv_channels=32, lstm_hidden_size=58, num_lstm_layers=2, dropout=0.2):
        """
        CNN-LSTM混合模型，先用CNN提取特征，再用LSTM处理时序关系
        
        参数:
        input_channels: 输入的通道数
        input_seq_length: 输入序列长度，这里是时间步数量
        input_features: 每个时间步的特征数，这里是3个主成分
        conv_channels: CNN卷积层输出的通道数
        lstm_hidden_size: LSTM隐藏层的大小
        num_lstm_layers: LSTM层的数量
        dropout: Dropout比率，用于防止过拟合
        """
        super(CNNLSTMModel, self).__init__()
        
        self.input_channels = input_channels
        self.input_seq_length = input_seq_length
        self.input_features = input_features
        self.conv_channels = conv_channels
        self.lstm_hidden_size = lstm_hidden_size
        self.num_lstm_layers = num_lstm_layers
        
        # CNN部分 - 提取局部模式特征
        self.conv1 = nn.Conv2d(input_channels, conv_channels, kernel_size=(2, 2), padding=(1, 1))
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=(2, 1))  # 在时间维度上降采样，保持特征维度
        
        # 计算CNN输出后的时间步长度
        self.cnn_output_seq_length = input_seq_length // 2  # 经过池化层，时间维度减半
        
        # 计算CNN输出后的特征维度
        self.cnn_output_features = conv_channels * input_features
        
        # LSTM部分 - 处理时序依赖关系
        self.lstm = nn.LSTM(
            input_size=self.cnn_output_features,
            hidden_size=lstm_hidden_size,
            num_layers=num_lstm_layers,
            batch_first=True,
            dropout=dropout if num_lstm_layers > 1 else 0
        )
        
        # 全连接层 - 映射到输出
        self.fc = nn.Linear(lstm_hidden_size, 1)
        
        # Dropout层
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        batch_size = x.size(0)
        
        # 添加通道维度 (batch_size, seq_len, features) -> (batch_size, channels, seq_len, features)
        x = x.unsqueeze(1)
        
        # CNN前向传播
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        
        # 重塑张量，准备输入LSTM
        # (batch_size, channels, cnn_output_seq_length, features) -> 
        # (batch_size, cnn_output_seq_length, channels*features)
        x = x.permute(0, 2, 1, 3)
        x = x.reshape(batch_size, self.cnn_output_seq_length, self.cnn_output_features)
        
        # LSTM前向传播
        h0 = torch.zeros(self.num_lstm_layers, batch_size, self.lstm_hidden_size).to(x.device)
        c0 = torch.zeros(self.num_lstm_layers, batch_size, self.lstm_hidden_size).to(x.device)
        lstm_out, _ = self.lstm(x, (h0, c0))
        
        # 只取最后一个时间步的输出
        lstm_out = lstm_out[:, -1, :]
        
        # 应用dropout
        lstm_out = self.dropout(lstm_out)
        
        # 全连接层映射到输出
        output = self.fc(lstm_out)
        
        return output.squeeze()  # 输出形状为(batch_size,)
model = CNNLSTMModel(
    input_channels=1,
    input_seq_length=10,  # 序列长度
    input_features=3,     # 3个主成分
    conv_channels=32,     # CNN卷积层通道数
    lstm_hidden_size=64,  # LSTM隐藏层大小
    num_lstm_layers=2,    # LSTM层数
    dropout=0.2           # Dropout比率
)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)  # 添加L2正则化

# 添加学习率调度器
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)

# 训练模型
num_epochs = 200  # 增加epoch数量以获得更好的性能
train_losses = []
val_losses = []
best_val_loss = float('inf')
patience = 25  # 提前停止的轮数
patience_counter = 0

# 打印模型结构
print(model)
print(f"模型总参数数量: {sum(p.numel() for p in model.parameters())}")

for epoch in range(num_epochs):
    # 训练阶段
    model.train()
    running_loss = 0.0
    
    for inputs, targets in train_loader:
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        
        # 梯度裁剪，防止梯度爆炸
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        running_loss += loss.item()
    
    # 计算每个epoch的平均训练损失
    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)
    
    # 验证阶段
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test_tensor)
        val_loss = criterion(val_outputs, y_test_tensor).item()
        val_losses.append(val_loss)
        
        # 更新学习率调度器
        scheduler.step(val_loss)
        
        # 提前停止
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # 保存最佳模型
            torch.save(model.state_dict(), 'best_cnn_lstm_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'提前停止训练！Epoch {epoch+1}')
                break
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}, Validation Loss: {val_loss:.4f}')

# 加载最佳模型
model.load_state_dict(torch.load('best_cnn_lstm_model.pth'))

# 评估模型
model.eval()
with torch.no_grad():
    # 对训练集进行预测
    train_predictions = model(X_train_tensor).numpy()
    
    # 对测试集进行预测
    test_predictions_cnnlstm = model(X_test_tensor).numpy()

# 反归一化处理
# y_train = scaler_y.inverse_transform(y_train.reshape(-1,1))
train_predictions = scaler_y.inverse_transform(train_predictions.reshape(-1,1))
# y_test = scaler_y.inverse_transform(y_test.reshape(-1,1))
test_predictions_cnnlstm = scaler_y.inverse_transform(test_predictions_cnnlstm.reshape(-1,1))


# 计算评估指标
train_rmse_cnnlstm = math.sqrt(mean_squared_error(y_train, train_predictions))
train_mae_cnnlstm = mean_absolute_error(y_train, train_predictions)
train_r2_cnnlstm = r2_score(y_train, train_predictions)

test_rmse_cnnlstm = math.sqrt(mean_squared_error(y_test, test_predictions_cnnlstm))
test_mae_cnnlstm = mean_absolute_error(y_test, test_predictions_cnnlstm)
test_r2_cnnlstm = r2_score(y_test, test_predictions_cnnlstm)

print("\n评估指标:")
print(f"训练集 - RMSE: {train_rmse_cnnlstm:.3f}, MAE: {train_mae_cnnlstm:.3f}, R²: {train_r2_cnnlstm:.4f}")
print(f"测试集 - RMSE: {test_rmse_cnnlstm:.3f}, MAE: {test_mae_cnnlstm:.3f}, R²: {test_r2_cnnlstm:.4f}")

# 可视化预测结果
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Real', color='blue')
plt.plot(test_predictions_cnnlstm, label='Predict', color='orange')
plt.legend()
plt.title('CNN-LSTM Model Prediction Results')
plt.xlabel('Time')
plt.ylabel('Closing Price')
plt.grid(True)
plt.savefig('cnn_lstm_prediction.png', dpi=300, bbox_inches='tight')
plt.show()

# 可视化训练和验证损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Training and Validation Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig('cnn_lstm_loss.png', dpi=300, bbox_inches='tight')
plt.show()