# 循环神经网络

## 从零开始实现循环神经网络

In [1]:
import torch
import torch.nn as nn
import time
import math
import sys
import utils
import os
BASE_DIR = os.path.dirname(os.getcwd())
sys.path.insert(0, os.path.join(BASE_DIR))
print(BASE_DIR)

D:\Project\Anaconda\dive2DL


In [2]:
# 读入数据（周杰伦歌词jaychou_lyrics.txt.zip）
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = utils.load_data_jay_lyrics(
    os.path.join(BASE_DIR, "data", "jaychou_lyrics.txt.zip"))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## one-hot 向量

In [4]:
def one_hot(x, n_class, dtype=torch.float32):
    result = torch.zeros(x.shape[0], n_class, dtype=dtype, device=x.device)
    result.scatter_(1, x.long().view(-1, 1), 1)
    return result

x = torch.tensor([0, 2])
x_one_hot = one_hot(x, vocab_size)
print(x_one_hot)
print(x_one_hot.shape)
print(x_one_hot.sum(axis=1))

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.]])
torch.Size([2, 1027])
tensor([1., 1.])


In [6]:
# 每次采样的小批量的shape（batch_size, num_step）(批量大小， 时间步数)
# 转换为（batch_size, dic.shape）(批量大小， 词典大小)
def to_onehot(X, n_class):
    return [one_hot(X[:, i], n_class) for i in range(X.shape[1])]

X = torch.arange(10).view(2, 5)
inputs = to_onehot(X, vocab_size)
print(len(inputs), inputs[0].shape)

5 torch.Size([2, 1027])


## 初始化模型参数

In [10]:
num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
# num_inputs : d
# num_hiddens : h, 隐藏单元的个数时超参数
# num_outputs : q

def get_params():
    def _one(shape):
        param = torch.zeros(shape, device=device, dtype=torch.float32)
        nn.init.normal_(param, 0, 0.01)
        return torch.nn.Parameter(param)
    
    # 隐藏层参数
    W_xh = _one((num_inputs, num_hiddens))
    W_hh = _one((num_hiddens, num_hiddens))
    b_h = torch.nn.Parameter(torch.zeros(num_hiddens, device=device))
    # 输出层参数
    W_hq = _one((num_hiddens, num_outputs))
    b_q = torch.nn.Parameter(torch.zeros(num_outputs, device=device))
    return (W_xh, W_hh, b_h, W_hq, b_q)

### 定义模型

In [16]:
def rnn(inputs, state, params):
    # inputs 和 outputs 皆为num_step个形状为（batch_size, vocab_size）的矩阵
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs =[]
    for X in inputs:
        H = torch.tanh(torch.matmul(X, W_xh) + torch.matmul(H, W_hh) + b_h)
        Y = torch.matmul(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H,)

In [17]:
def init_rnn_state(batch_size, num_hiddens, device):
    return (torch.zeros((batch_size, num_hiddens), device=device), )

In [18]:
print(X.shape)
print(num_hiddens)
print(vocab_size)
state = init_rnn_state(X.shape[0], num_hiddens, device)
inputs = to_onehot(X.to(device), vocab_size)
params = get_params()
outputs, state_new = rnn(inputs, state, params)
print(len(inputs), inputs[0].shape)
print(len(outputs), outputs[0].shape)
print(len(state), state[0].shape)
print(len(state_new), state_new[0].shape)

torch.Size([2, 5])
256
1027
5 torch.Size([2, 1027])
5 torch.Size([2, 1027])
1 torch.Size([2, 256])
1 torch.Size([2, 256])
