In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os
import requests
from io import StringIO

# 全局设置中文字体，以“SimSun（宋体）”为例，也可用“Microsoft YaHei（微软雅黑）”等
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(6,4))

# 设置随机种子确保结果可复现
torch.manual_seed(42)
np.random.seed(42)

<Figure size 600x400 with 0 Axes>

In [2]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers_per_block, output_size, dropout=0.2):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers_per_block = num_layers_per_block
        
        # 定义6层LSTM网络结构
        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers_per_block, batch_first=True, dropout=dropout)
        self.lstm2 = nn.LSTM(hidden_size, hidden_size, num_layers_per_block, batch_first=True, dropout=dropout)
        self.lstm3 = nn.LSTM(hidden_size, hidden_size, num_layers_per_block, batch_first=True, dropout=dropout)
        self.lstm4 = nn.LSTM(hidden_size, hidden_size, num_layers_per_block, batch_first=True, dropout=dropout)
        self.lstm5 = nn.LSTM(hidden_size, hidden_size, num_layers_per_block, batch_first=True, dropout=dropout)
        self.lstm6 = nn.LSTM(hidden_size, hidden_size, num_layers_per_block, batch_first=True, dropout=dropout)
        
        self.fc = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()  # 隐藏层使用ReLU激活函数
        
        # 权重初始化
        self._initialize_weights()
        
    def _initialize_weights(self):
        """初始化LSTM和全连接层权重"""
        for name, param in self.named_parameters():
            if 'lstm' in name and 'weight' in name:
                nn.init.orthogonal_(param.data)
            elif 'fc' in name and 'weight' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'bias' in name:
                nn.init.zeros_(param.data)

    def forward(self, x):
        
        # 为每层LSTM初始化隐藏状态
        h0_1 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        c0_1 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        
        h0_2 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        c0_2 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        
        h0_3 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        c0_3 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        
        h0_4 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        c0_4 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        
        h0_5 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        c0_5 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        
        h0_6 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        c0_6 = torch.zeros(self.num_layers_per_block, x.size(0), self.hidden_size).to(x.device)
        
        # 前向传播LSTM
        out, _ = self.lstm1(x, (h0_1, c0_1))
        out = self.relu(out)
        out, _ = self.lstm2(out, (h0_2, c0_2))
        out = self.relu(out)
        out, _ = self.lstm3(out, (h0_3, c0_3))
        out = self.relu(out)
        out, _ = self.lstm4(out, (h0_4, c0_4))
        out = self.relu(out)
        out, _ = self.lstm5(out, (h0_5, c0_5))
        out = self.relu(out)
        out, _ = self.lstm6(out, (h0_6, c0_6))
        out = self.relu(out)
        
        # 只取序列的最后一个时间步的输出
        out = self.fc(out[:, -1, :])
        return out

In [3]:
def download_uci_data(url):
    """下载UCI数据集"""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            raise Exception(f"下载失败，状态码: {response.status_code}")
    except Exception as e:
        print(f"下载数据时出错: {e}")
        return None

In [4]:
def preprocess_data(data, seq_length, test_size=0.2, val_size=0.2):
    """数据预处理和序列生成"""
    # 假设数据最后一列为目标变量
    X = data[:, :-1]
    y = data[:, -1]
    
    # 数据标准化
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    
    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()
    
    # 生成序列
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i+seq_length])
        y_seq.append(y[i+seq_length])
    
    X_seq, y_seq = np.array(X_seq), np.array(y_seq)
    
    # 划分训练集、验证集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=test_size, random_state=42, shuffle=False)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size/(1-test_size), random_state=42, shuffle=False)
    
    # 转换为PyTorch张量
    X_train = torch.FloatTensor(X_train)
    y_train = torch.FloatTensor(y_train)
    X_val = torch.FloatTensor(X_val)
    y_val = torch.FloatTensor(y_val)
    X_test = torch.FloatTensor(X_test)
    y_test = torch.FloatTensor(y_test)
    
    return X_train, y_train, X_val, y_val, X_test, y_test, scaler_y

In [5]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, device, scheduler=None, gradient_clip=None, early_stopping_patience=10):
    """训练模型"""
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    best_model = None
    patience = 0
    
    for epoch in range(epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            # 前向传播
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.unsqueeze(1))
            
            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            
            # 梯度裁剪
            if gradient_clip:
                nn.utils.clip_grad_norm_(model.parameters(), gradient_clip)
            
            optimizer.step()
            
            
            train_loss += loss.item()
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch.unsqueeze(1))
                val_loss += loss.item()
        
        # 记录损失
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        
        # 学习率调度
        if scheduler:
            scheduler.step(val_loss)
        
        # 保存最佳模型
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict().copy()
            patience = 0
        else:
            patience += 1
            if patience >= early_stopping_patience:
                print(f"早停触发：验证集损失 {early_stopping_patience} 个epoch未改善")
                break
                
        # 打印训练进度
        current_lr = optimizer.param_groups[0]['lr']
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, LR: {current_lr:.6f}')
    

    # 加载最佳模型
    model.load_state_dict(best_model)
    
    return model, train_losses, val_losses

In [6]:
def generate_samples(model, X_test, scaler_y, device, num_samples=5000):
    """生成样本并反标准化"""
    model.eval()
    with torch.no_grad():
        X_test = X_test.to(device)
        # 从测试数据中随机选择起点生成样本
        indices = np.random.randint(0, len(X_test), num_samples)
        samples = []
        
        for idx in indices:
            sample = model(X_test[idx:idx+1])
            samples.append(sample.cpu().numpy())
    
    samples = np.array(samples).reshape(-1, 1)
    # 反标准化
    samples = scaler_y.inverse_transform(samples)
    
    return samples

In [7]:
def evaluate_model(model, X_test, y_test, scaler_y, device):
    """评估模型性能"""
    model.eval()
    with torch.no_grad():
        X_test, y_test = X_test.to(device), y_test.to(device)
        predictions = model(X_test)
        
        # 反标准化
        predictions = scaler_y.inverse_transform(predictions.cpu().numpy())
        y_test = scaler_y.inverse_transform(y_test.cpu().numpy().reshape(-1, 1))
    
    # 计算评估指标
    mse = np.mean((predictions - y_test) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(predictions - y_test))
    mape = np.mean(np.abs((predictions - y_test) / y_test)) * 100
    
    print(f'评估指标:')
    print(f'MSE: {mse:.4f}')
    print(f'RMSE: {rmse:.4f}')
    print(f'MAE: {mae:.4f}')
    print(f'MAPE: {mape:.2f}%')
    
    return {
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'mape': mape
    }

In [8]:
def plot_losses(train_losses, val_losses):
    """绘制训练和验证损失曲线"""
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Loss of train set')
    plt.plot(val_losses, label='Loss of Validation set')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss curve of Train and Validation set')
    plt.legend()
    plt.grid(True)
    plt.savefig('loss_curve.png')
    plt.close()

In [9]:
def main(epochs, lr):
#     # 数据集URL（以空气质量数据集为例）
#     data_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00360/AirQualityUCI.csv'
    
#     # 下载数据
#     print("正在下载数据集...")
#     data_text = download_uci_data(data_url)
#     if data_text is None:
#         print("无法下载数据，尝试使用本地数据或其他数据集")
#         return

    # 解析数据
    print("正在解析数据...")
    # 注意：实际应用中可能需要根据特定数据集调整解析方式
    data = pd.read_csv('./data/AirQuality/AirQualityUCI.csv', sep=';', decimal=',')

    # 删除指定的两列（axis=1 表示列）
    data = data.drop(['Unnamed: 15', 'Unnamed: 16'], axis=1)

    # 移除包含NaN的行
    data = data.dropna(axis=0)

    # 移除文本列（日期和时间）
    data = data.iloc[:, 2:].values
    
    # 数据预处理
    seq_length = 48  # 使用前48个时间步预测下一个时间步
    X_train, y_train, X_val, y_val, X_test, y_test, scaler_y = preprocess_data(data, seq_length)
    
    print(f"训练集形状: {X_train.shape},  验证集形状: {X_val.shape},  测试集形状: {X_test.shape}")
    
    # 创建数据加载器
    batch_size = 32
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    # 设置设备
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    # 初始化模型
    input_size = X_train.shape[2]  # 特征数量
    hidden_size = 128
    num_layers_per_block = 3  # 每层LSTM的层数
    output_size = 1  # 预测值数量
    dropout = 0.2  # 添加dropout防止过拟合
    
    model = LSTMModel(input_size, hidden_size, num_layers_per_block, output_size, dropout).to(device)
    
    # 定义损失函数和优化器
    criterion = nn.MSELoss()  # 均方误差损失函数
    optimizer = optim.Adam(model.parameters(), lr)  # Adam优化器
    
    # 学习率调度器
    scheduler = ReduceLROnPlateau(
        optimizer, 
        mode='min', 
        factor=0.5,  # 学习率降低因子
        patience=5,  # 等待5个epoch没有改善再降低学习率
        verbose=True  # 打印学习率调整信息
    )
    
    # 训练参数
    # epochs = 200  # 增加最大训练轮次
    gradient_clip = 1.0  # 梯度裁剪阈值
    early_stopping_patience = 50  # 早停等待轮次
    
    # 训练模型
    print("开始训练模型...")
    model, train_losses, val_losses = train_model(
        model, train_loader, val_loader, criterion, optimizer, epochs, device,
        scheduler=scheduler, gradient_clip=gradient_clip, early_stopping_patience=early_stopping_patience
    )
    
    # 绘制损失曲线
    plot_losses(train_losses, val_losses)
    
    # 生成样本
    print("正在生成样本...")
    samples = generate_samples(model, X_test, scaler_y, device, num_samples=5000)
    
    # 保存样本
    np.savetxt('generated_samples.csv', samples, delimiter=',')
    print("样本已保存至 generated_samples.csv")
    
    # 评估模型
    print("正在评估模型...")
    metrics = evaluate_model(model, X_test, y_test, scaler_y, device)
    
    # 保存模型
    torch.save(model.state_dict(), 'lstm_model.pth')
    print("模型已保存至 lstm_model.pth")

In [10]:
if __name__ == "__main__":
    main(epochs = 1000, lr=0.0005)

正在解析数据...
训练集形状: torch.Size([5585, 48, 12]),  验证集形状: torch.Size([1862, 48, 12]),  测试集形状: torch.Size([1862, 48, 12])
使用设备: cuda




开始训练模型...
Epoch [10/1000], Train Loss: 0.6901, Val Loss: 1.7870, LR: 0.000250
Epoch [20/1000], Train Loss: 0.6862, Val Loss: 1.7835, LR: 0.000250
Epoch [30/1000], Train Loss: 0.6899, Val Loss: 1.7819, LR: 0.000063
Epoch [40/1000], Train Loss: 0.6899, Val Loss: 1.7816, LR: 0.000016


findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial Unicode MS
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial Unicode MS


早停触发：验证集损失 30 个epoch未改善
正在生成样本...
样本已保存至 generated_samples.csv
正在评估模型...
评估指标:
MSE: 1794.0538
RMSE: 42.3563
MAE: 13.5381
MAPE: 834.64%
模型已保存至 lstm_model.pth
