## 练习

1. 尝试使用高级API，能使循环神经网络模型过拟合吗？
1. 如果在循环神经网络模型中增加隐藏层的数量会发生什么？能使模型正常工作吗？
1. 尝试使用循环神经网络实现 :numref:`sec_sequence`的自回归模型。

In [6]:
## 练习1
import torch
from torch import nn
from d2l import torch as d2l
import torch.nn.functional as F

# 数据加载
batch_size, num_steps = 32, 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

# 减少训练数据的数量
num_samples = 500  # 定义要使用的样本数量
train_data = list(iter(train_iter))
train_data = train_data[:num_samples]

# 定义模型
num_hiddens = 256
num_layers = 2
rnn_layer = nn.RNN(len(vocab), num_hiddens, num_layers)
device = d2l.try_gpu()

class RNNModel(nn.Module):
    """循环神经网络模型"""
    def __init__(self, rnn_layer, vocab_size, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.num_hiddens = self.rnn.hidden_size
        # 如果RNN是双向的（之后将介绍），num_directions应该是2，否则应该是1
        if not self.rnn.bidirectional:
            self.num_directions = 1
            self.linear = nn.Linear(self.num_hiddens, self.vocab_size)
        else:
            self.num_directions = 2
            self.linear = nn.Linear(self.num_hiddens * 2, self.vocab_size)

    def forward(self, inputs, state):
        X = F.one_hot(inputs.T.long(), self.vocab_size)
        X = X.to(torch.float32)
        Y, state = self.rnn(X, state)
        output = self.linear(Y.reshape((-1, Y.shape[-1])))
        return output, state

    def begin_state(self, batch_size, device):
        # 返回一个初始化的隐藏状态
        return torch.zeros((self.num_directions * self.rnn.num_layers,
                            batch_size, self.num_hiddens),
                           device=device)
            
net = RNNModel(rnn_layer, vocab_size=len(vocab))
net = net.to(device)

# 训练模型
def train_model(net, train_data, vocab, lr, num_epochs, device):
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    net.train()
    for epoch in range(num_epochs):
        for X, Y in train_data:
            X, Y = X.to(device), Y.to(device)
            state = net.begin_state(X.shape[0], device)
            y_hat, state = net(X, state)
            l = loss(y_hat, Y.T.reshape(-1))
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

lr, num_epochs = 0.01, 10
train_model(net, train_data, vocab, lr, num_epochs, device)

In [7]:
## 练习2
# 数据加载
batch_size, num_steps = 32, 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

# 定义模型（增加隐藏层数量）
num_hiddens = 256
num_layers = 4  # 增加隐藏层的数量
rnn_layer = nn.RNN(len(vocab), num_hiddens, num_layers)  # 修改这里
device = d2l.try_gpu()

class RNNModel(nn.Module):
    """循环神经网络模型"""
    def __init__(self, rnn_layer, vocab_size, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.num_hiddens = self.rnn.hidden_size
        if not self.rnn.bidirectional:
            self.num_directions = 1
            self.linear = nn.Linear(self.num_hiddens, self.vocab_size)
        else:
            self.num_directions = 2
            self.linear = nn.Linear(self.num_hiddens * 2, self.vocab_size)

    def forward(self, inputs, state):
        X = F.one_hot(inputs.T.long(), self.vocab_size)
        X = X.to(torch.float32)
        Y, state = self.rnn(X, state)
        output = self.linear(Y.reshape((-1, Y.shape[-1])))
        return output, state

    def begin_state(self, batch_size, device):
        return torch.zeros((self.num_directions * self.rnn.num_layers,
                            batch_size, self.num_hiddens),
                           device=device)
            
net = RNNModel(rnn_layer, vocab_size=len(vocab))
net = net.to(device)

# 训练模型
def train_model(net, train_iter, vocab, lr, num_epochs, device):
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    net.train()
    for epoch in range(num_epochs):
        for X, Y in train_iter:
            X, Y = X.to(device), Y.to(device)
            state = net.begin_state(X.shape[0], device)
            y_hat, state = net(X, state)
            l = loss(y_hat, Y.T.reshape(-1))
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

lr, num_epochs = 0.01, 10
train_model(net, train_iter, vocab, lr, num_epochs, device)

In [5]:
## 练习3
import torch
from torch import nn
from d2l import torch as d2l

# 加载数据
T = 1000  # 总共产生1000个点
time = torch.arange(1, T + 1, dtype=torch.float32)
x = torch.sin(0.01 * time) + torch.normal(0, 0.2, (T,))
tau = 4
features = torch.zeros((T - tau, tau))
for i in range(tau):
    features[:, i] = x[i: T - tau + i]
labels = x[tau:].reshape((-1, 1))

batch_size, n_train = 16, 600
# 只有前n_train个样本用于训练
train_iter = d2l.load_array((features[:n_train], labels[:n_train]), batch_size, is_train=True)

# 定义模型
class RNNModel(nn.Module):
    def __init__(self, num_hiddens):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(tau, num_hiddens)
        self.linear = nn.Linear(num_hiddens, 1)

    def forward(self, x, state):
        x = x.unsqueeze(dim=0)  # RNN需要三维输入: 时间步数、批量大小、特征数
        y, state = self.rnn(x, state)
        y = self.linear(y.reshape((-1, y.shape[-1])))
        return y, state

    def begin_state(self, device, batch_size=1):
        return torch.zeros((1, batch_size, num_hiddens), device=device)

# 初始化模型
num_hiddens = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = RNNModel(num_hiddens)
net = net.to(device)

# 训练
def train_rnn(net, train_iter, lr, num_epochs, device):
    loss = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    net.train()
    for epoch in range(num_epochs):
        for X, Y in train_iter:
            X, Y = X.to(device), Y.to(device)
            # 在每个迭代开始时重新初始化隐藏状态
            state = net.begin_state(device, X.shape[0])
            y_hat, state = net(X, state)
            l = loss(y_hat, Y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        print(f'Epoch {epoch}, Loss: {l.item()}')
        
lr, num_epochs = 0.01, 5
train_rnn(net, train_iter, lr, num_epochs, device)

Epoch 0, Loss: 0.057332560420036316
Epoch 1, Loss: 0.10452727973461151
Epoch 2, Loss: 0.05440758168697357
Epoch 3, Loss: 0.033264487981796265
Epoch 4, Loss: 0.05510817468166351
