# 作业5：RNN 生成模型

以 `data/names.txt` 中的英文名作为训练集，利用 RNN 或 LSTM 等方法对字母序列数据进行建模，然后使用拟合的模型随机生成20个名字。本次作业为开放式，不指定各类超参数（如网络结构、学习率、迭代次数等），但需提供必要的输出和诊断结果支持你的选择（如模型是否收敛、效果评价等）。

提示：可以参照 `lec12-rnn-generation.zip` 中的代码，但注意英文名不需要像中文那样构建字典，因为可以直接使用26个字母作为字典。

In [1]:
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import itertools
import collections
import matplotlib.pyplot as plt

读取数据

In [2]:
# load txt file

def read_txt_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read().split()
        return content

dat = read_txt_file('data/names.txt')
print(dat[:5])

['abbas', 'abbey', 'abbott', 'abdi', 'abel']


In [10]:
# construct dictionary
charset_size = 27 # 26 letters  + 1 <EOS>
dictionary = list('abcdefghijklmnopqrstuvwxyz') + ['<EOS>'] 
print(dictionary)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '<EOS>']


In [9]:
# names to tensor
def char2index(char):
    """Transform a character to its index in the dictionary
    Args:
        char (str): a character
        
    Returns:
       int: the index of the character in the dictionary
        
    """
    return dictionary.index(char.lower()) 

def names2tensor(NameList):
    """Transform a list of names to one-hot tensor
    Args:
        NameList (array): a list of names
        
    Returns:
        tensor: a tensor of shape (LongestNameLength, NumberOfNames, charset_size=27), storing the one-hot representation of names
        array: a numpy array of shape (NumberOfNames), storing each name's length
        target: a tensor of shape (LongestNameLength, NumberOfNames), storing the index of the next letter
        
    """
    names_num = len(NameList) # number of names
    names_lens = [len(name) for name in NameList] # a list storing each name's length
    max_name_len = max(names_lens) # the longest name's length
    
    tensor = torch.zeros(max_name_len, names_num, charset_size) # (each char in a name, each name, one-hot vector)
    target = torch.zeros(max_name_len, names_num, dtype=int) + charset_size - 1 # initialize with <EOS>
    
    for name_i in range(names_num): # for each name(idx) in data set
        name = NameList[name_i] # get the name
        for char_i in range(names_lens[name_i]): # for each char(idx) in the name
            # set tensor
            tensor[char_i, name_i, char2index(name[char_i])] = 1 # set the corresponding one-hot vector
            # set target
            if char_i < names_lens[name_i] - 1: # if not the last char (here note that python index starts from 0)
                target[char_i, name_i] = char2index(name[char_i + 1]) # target for name_i, char_i is char_i+1
                
    return tensor, np.array(names_lens), target

# test names2tensor
names2tensor(["leon","rachel"])

(tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,