# 第7章·实验4：面向时序分析的 RNN（LSTM/GRU）本 Notebook 基于同一合成时序数据集，实现循环神经网络对序列的分类，涵盖数据预处理、模型搭建、训练评估与实验拓展，方便按照课件要求完成实验记录。

## 1. 环境准备与 CNN 版本保持一致：`torch`, `numpy`, `matplotlib`, `scikit-learn`。若有 GPU 会自动切换。

In [None]:
import math
import random
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

# 设定随机种子
torch.manual_seed(7)
np.random.seed(7)
random.seed(7)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


## 2. 数据集与 DataLoader- 仍使用正弦波 vs. 方波分类示例，方便对比 CNN 与 RNN。- 将序列转置为 (序列长度, 特征数) 以便喂入 RNN。

In [None]:
from typing import Tuple

def generate_wave(sample_len: int, kind: str, noise_scale: float = 0.15) -> np.ndarray:
    x = np.linspace(0, 2 * math.pi, sample_len)
    if kind == "sine":
        base = np.sin(x)
    elif kind == "square":
        base = np.sign(np.sin(x))
    else:
        raise ValueError("Unsupported kind")
    noise = np.random.normal(scale=noise_scale, size=sample_len)
    return base + noise

def build_dataset(n_samples: int = 1200, sample_len: int = 200) -> Tuple[np.ndarray, np.ndarray]:
    signals = []
    labels = []
    for _ in range(n_samples // 2):
        signals.append(generate_wave(sample_len, "sine"))
        labels.append(0)
        signals.append(generate_wave(sample_len, "square"))
        labels.append(1)
    signals = np.stack(signals).astype(np.float32)
    labels = np.array(labels, dtype=np.int64)
    max_val = np.abs(signals).max()
    signals = signals / max_val
    return signals, labels

class WaveformSequence(Dataset):
    def __init__(self, signals: np.ndarray, labels: np.ndarray):
        self.signals = torch.tensor(signals)
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # RNN 期望形状: (seq_len, feature_dim)，这里 feature_dim=1
        seq = self.signals[idx].unsqueeze(-1)
        return seq, self.labels[idx]

signals, labels = build_dataset(n_samples=1200, sample_len=200)
indices = np.random.permutation(len(labels))
train_end = int(len(labels) * 0.7)
val_end = int(len(labels) * 0.85)
train_idx, val_idx, test_idx = indices[:train_end], indices[train_end:val_end], indices[val_end:]

train_data = WaveformSequence(signals[train_idx], labels[train_idx])
val_data = WaveformSequence(signals[val_idx], labels[val_idx])
test_data = WaveformSequence(signals[test_idx], labels[test_idx])

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)

len(train_data), len(val_data), len(test_data)


### 序列可视化随机展示两类序列，观察波形和噪声水平，便于后续调参。

In [None]:
samples_to_plot = 3
fig, axes = plt.subplots(samples_to_plot, 2, figsize=(10, 6), sharex=True, sharey=True)
for row in range(samples_to_plot):
    for col, label in enumerate([0, 1]):
        idx = (labels == label).nonzero()[0][row]
        axes[row, col].plot(signals[idx])
        axes[row, col].set_title(f"label={label}")
plt.tight_layout()
plt.show()


## 3. 构建循环神经网络示例使用双层 LSTM + 注意力式加权平均（通过可学习的线性层近似），并提供 GRU 的替换示例。可按照课件实验要求调整隐藏维度、层数或改用双向 RNN。

In [None]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=64, num_layers=2, num_classes=2, bidirectional=True, dropout=0.3):
        super().__init__()
        self.bidirectional = bidirectional
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers,
                            batch_first=True, bidirectional=bidirectional, dropout=dropout)
        direction_factor = 2 if bidirectional else 1
        self.attention = nn.Linear(hidden_dim * direction_factor, 1)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * direction_factor, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, num_classes)
        )

    def forward(self, x):
        # x: (batch, seq_len, feature_dim)
        outputs, _ = self.lstm(x)
        # 简单注意力权重
        weights = torch.softmax(self.attention(outputs).squeeze(-1), dim=1)  # (batch, seq_len)
        context = torch.sum(outputs * weights.unsqueeze(-1), dim=1)  # 加权和
        return self.classifier(context)

# 若想改用 GRU：将上方 self.lstm = nn.GRU(...) 即可
model = LSTMClassifier().to(device)
model


## 4. 训练与验证- 优化器使用 Adam，损失函数为交叉熵。- 记录训练/验证的损失与准确率，便于对比 CNN 的收敛速度与泛化表现。

In [None]:
from collections import defaultdict

def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss, correct, total = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * y.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)
    return total_loss / total, correct / total

def evaluate(model, loader, criterion):
    model.eval()
    total_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            loss = criterion(logits, y)
            total_loss += loss.item() * y.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    return total_loss / total, correct / total

def run_training(model, train_loader, val_loader, epochs=20, lr=1e-3):
    history = defaultdict(list)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    for epoch in range(1, epochs + 1):
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc = evaluate(model, val_loader, criterion)
        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["train_acc"].append(train_acc)
        history["val_acc"].append(val_acc)
        print(f"Epoch {epoch:02d}: train_loss={train_loss:.4f} val_loss={val_loss:.4f} "
              f"train_acc={train_acc:.3f} val_acc={val_acc:.3f}")
    return history

history = run_training(model, train_loader, val_loader, epochs=20, lr=1e-3)


### 学习曲线与 CNN 结果对照，观察 RNN/LSTM 的收敛特性和泛化趋势。

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(history["train_loss"], label="train")
axes[0].plot(history["val_loss"], label="val")
axes[0].set_title("Loss")
axes[0].legend()
axes[1].plot(history["train_acc"], label="train")
axes[1].plot(history["val_acc"], label="val")
axes[1].set_title("Accuracy")
axes[1].legend()
plt.show()


### 测试集评估输出分类报告与混淆矩阵，为实验报告提供量化指标。

In [None]:
criterion = nn.CrossEntropyLoss()
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"Test loss: {test_loss:.4f}, Test acc: {test_acc:.3f}")

all_labels = []
all_preds = []
model.eval()
with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        logits = model(x)
        preds = logits.argmax(dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(y.numpy())

print(classification_report(all_labels, all_preds, target_names=["sine", "square"]))
cm = confusion_matrix(all_labels, all_preds)

fig, ax = plt.subplots(figsize=(4, 4))
im = ax.imshow(cm, cmap="Greens")
ax.set_xticks([0, 1])
ax.set_yticks([0, 1])
ax.set_xticklabels(["sine", "square"])
ax.set_yticklabels(["sine", "square"])
ax.set_xlabel("Predicted")
ax.set_ylabel("True")
for (i, j), val in np.ndenumerate(cm):
    ax.text(j, i, int(val), ha="center", va="center", color="black")
fig.colorbar(im)
plt.show()


## 5. 课后/实验要求提示- **结构对比**：尝试 GRU、单层/多层、双向与否，并记录验证集结果。- **序列截断/填充**：调整序列长度或使用滑动窗口，讨论对性能的影响。- **训练策略**：尝试调节学习率、梯度裁剪或加入权重衰减，观察梯度稳定性。- **结果总结**：结合 CNN 实验，对比两类模型的优势与局限，给出改进建议。