In [76]:
from io import open     # 从io中导入文件打开方法
import glob
import os
import string   # 以下二者为常见的字母字符规范化
import unicodedata
import random
import math
import time
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [77]:
data_path = '../data/names/'

def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [line for line in lines]

filename = data_path + 'Chinese.txt'
result = readLines(filename)

category_lines = {}

all_categories = []

for filename in glob.glob(data_path + '*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

In [78]:
len(all_categories), category_lines['Chinese'][:10]

(18,
 ['Ang', 'Au-Yong', 'Bai', 'Ban', 'Bao', 'Bei', 'Bian', 'Bui', 'Cai', 'Cao'])

In [79]:
all_letters = list("abcdefghijklmnopqrstuvwxyz ")
n_letters = len(all_letters)
def lineToTensor(line):
    """
    line: 人名
    将人名转为One-Hot张量，每一个字母用1*n_letters的张量表示，则每个名字的形状是len(line)*1*n_letters
    """
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][all_letters.index(letter.lower())] = 1
    return tensor

In [80]:
x = torch.tensor([1,2,3,4])
print(x.shape)
y = torch.unsqueeze(x, dim=0)
print(y.shape)
z = torch.unsqueeze(x, dim=1)
print(z, z.shape)

torch.Size([4])
torch.Size([1, 4])
tensor([[1],
        [2],
        [3],
        [4]]) torch.Size([4, 1])


In [81]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        """
        input_size:输入的最后一个维度
        hidden_size:隐层的最后一个维度
        output_size:RNN的输出维度
        num_layers:网络层数
        """
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers)
        self.linear = nn.Linear(self.hidden_size, self.output_size)
        self.softmax = nn.LogSoftmax(dim=-1)
    
    def forward(self, input1, hidden):
        """
        input1:代表人名分类器中的输入张量，形状是1*n_letters
        hidden:代表RNN的隐藏层张量，形状是self.num_layers*1*self.hidden_size
        所以要升维匹配
        """
        input1 = input1.unsqueeze(0)
        rr, hn = self.rnn(input1, hidden)
        return self.softmax(self.linear(rr)), hn
    
    def initHidden(self):
        # 辅助函数，初始化全0的隐藏层张量，维度是3
        return torch.zeros(self.num_layers, 1, self.hidden_size)


In [82]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1) -> None:
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.LSTM = nn.LSTM(self.input_size, self.hidden_size, self.num_layers)
        self.linear = nn.Linear(self.hidden_size, self.output_size)
        self.softmax = nn.LogSoftmax(dim=-1)
    
    def forward(self, input, hidden, c):
        # 相比RNN，3个输入，多了细胞状态c
        input = input.unsqueeze(0)
        rr, (hn, cn) = self.LSTM(input, (hidden, c))
        return self.softmax(self.linear(rr)), hn, cn
    
    def initHiddenAndC(self):
        # 注意：对LSTM来说，初始化的时候要同时初始化hidden和细胞状态c
        # hidden和c的形状一致
        hidden = c = torch.zeros(self.num_layers, 1, self.hidden_size)
        return hidden, c

In [83]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1) -> None:
        # output_size都指的是线性层的输出维度
        super(GRU, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.gru = nn.GRU(self.input_size, self.hidden_size, self.num_layers)
        self.linear = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=-1)
    
    def forward(self, input, hidden):
        input = input.unsqueeze(0)
        rr, hn = self.gru(input, hidden)
        return self.softmax(self.linear(rr)), hn
    
    def initHidden(self):
        return torch.zeros(self.num_layers, 1, self.hidden_size)

In [84]:
# 参数
input_size = n_letters
n_hidden = 128
output_size = len(all_categories)

input1 = lineToTensor('B').squeeze(0)
hidden = c = torch.zeros(1, 1, n_hidden)
rnn = RNN(input_size, n_hidden, output_size)
lstm = LSTM(input_size, n_hidden, output_size)
gru = GRU(input_size, n_hidden, output_size)

rnn_output, next_hidden = rnn(input1, hidden)
print(f'rnn:{rnn_output}\nrnn_shape:{rnn_output.shape}\n***************')

lstm_output, next_hidden1, c = lstm(input1, hidden, c)
print(f'lstm_output:{lstm_output}\nlstm_output.shape:{lstm_output.shape}')

gru_output, next_hidden2 = gru(input1, hidden)
print(f'gru_output:{gru_output}\ngru_output.shape:{gru_output.shape}')

rnn:tensor([[[-2.9667, -2.8798, -2.9286, -2.9802, -2.9913, -2.9198, -2.8710,
          -2.7793, -2.7791, -2.7871, -2.8621, -2.8945, -2.9117, -2.9206,
          -2.9285, -2.8478, -2.9143, -2.8986]]], grad_fn=<LogSoftmaxBackward0>)
rnn_shape:torch.Size([1, 1, 18])
***************
lstm_output:tensor([[[-2.9067, -2.9667, -2.8739, -2.9390, -2.8995, -2.8914, -2.7993,
          -2.8809, -2.9499, -2.8513, -2.8249, -2.9569, -2.8423, -2.9253,
          -2.9830, -2.8420, -2.8713, -2.8462]]], grad_fn=<LogSoftmaxBackward0>)
lstm_output.shape:torch.Size([1, 1, 18])
gru_output:tensor([[[-2.8521, -3.0085, -2.9111, -2.8667, -2.8428, -2.8855, -2.9484,
          -2.9936, -2.8702, -2.8522, -2.8134, -2.9670, -2.8657, -2.9429,
          -2.8254, -2.8415, -2.8573, -2.9110]]], grad_fn=<LogSoftmaxBackward0>)
gru_output.shape:torch.Size([1, 1, 18])


In [85]:
# 构建训练函数--辅助函数1
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)   # top_n没用上
    category_i = top_i[0].item()
    return all_categories[category_i], category_i

x = torch.range(1, 6)
rank_k = torch.topk(x, 2)
print(f'x:{x}\nrank_k:{rank_k}')

x:tensor([1., 2., 3., 4., 5., 6.])
rank_k:torch.return_types.topk(
values=tensor([6., 5.]),
indices=tensor([5, 4]))


  x = torch.range(1, 6)


In [86]:
name = 'b'
name_tensor = lineToTensor(name)
pred_name, _ = gru(name_tensor.squeeze(0), hidden)
categoryFromOutput(pred_name)

('Japanese', 10)

In [87]:
categogry, category_i = categoryFromOutput(gru_output)
print(f'category:{categogry}\ncategory_i:{category_i}')

category:Japanese
category_i:10


In [113]:
# 构建训练函数--辅助函数二

all_categories = ['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
def randomTrainingExample():
    # 该函数的作用是用于随机产生训练数据
    # 第一步：使用random.choice()从all_categories中随机选择一个类别
    category = random.choice(all_categories)
    print(f'$$$$$$$$$$$$$ 所有类别：{all_categories}')
    # 第二步：通过category_lines字典取出category类别对应的名字列表
    line = random.choice(category_lines[category])
    # 第三步：将类别封装成tensor
    categoty_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    # 第四步：将随机取到的名字通过lineToTensor()转换为one-hot张量
    line_tensor = lineToTensor(line)
    return category, line, categoty_tensor, line_tensor

In [115]:
for i in range(10):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    print(f'categogry={category}\nline={line}\ncategory_tensor={category_tensor}')
print(f'line_tensor:{line_tensor}')

$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
categogry=Polish
line=Bartosz
category_tensor=tensor([9])
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
categogry=Arabic
line=Kassab
category_tensor=tensor([0])
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
categogry=German
line=Sitz
category_tensor=tensor([4])
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
categogry=English
line=Duckworth
category_tensor=tensor([2])
$$$$$$$$$$$$$ 所有类别：['Arab

ValueError: 'é' is not in list

In [99]:
# 构建传统RNN训练函数
criterion = nn.NLLLoss()
learning_rate = .005

def trainRNN(category_tensor, line_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)
    loss = criterion(output.squeeze(0), category_tensor)
    loss.backward()
    for p in rnn.parameters():
        p.data.add_(-learning_rate, p.grad.data)
    return output, loss.item()

In [100]:
# 构建LSTM训练函数
def trainLSTM(categogry_tensor, line_tensor):
    hidden, c = lstm.initHiddenAndC()
    lstm.zero_grad()
    for i in range(line_tensor.size()[0]):
        output, hidden, c = lstm(line_tensor[i], hidden, c)
    loss = criterion(output.squeeze(0), category_tensor)
    loss.backward()
    for p in lstm.parameters():
        p.data.add_(-learning_rate, p.grad.data)
    return output, loss.item()

In [92]:
def trainGRU(categogry_tensor, line_tensor):
    hidden = gru.initHidden()
    gru.zero_grad()
    for i in range(line_tensor.size()[0]):
        output, hidden = gru(line_tensor[i], hidden)
    loss = criterion(output.squeeze(0), category_tensor)
    loss.backward()
    for p in gru.parameters():
        p.data.add_(-learning_rate, p.grad.data)
    return output, loss.item()

In [101]:
# 构建时间计算函数
def timeSince(since):
    now = time.time()
    s = now - since
    m = s // 60
    s -= m * 60
    return f'{m}m {s}s'

since = time.time() - 10 * 60
period = timeSince(since)
print(period)

10.0m 0.0s


In [94]:
# 构建训练过程的日志打印函数
n_iters = 1000
print_every = 50
plot_every = 10

def train(train_type_fn):
    # trian_type_fn 代表选择哪种模型来训练，比如trainRNN
    all_losses = []
    start = time.time()
    current_loss = 0
    for iter in range(1, n_iters + 1):
        category, line, category_tensor, line_tensor = randomTrainingExample()
        output, loss = train_type_fn(category_tensor, line_tensor)
        current_loss += loss
        if iter % print_every == 0:
            guess, guess_i = categoryFromOutput(output)
            correct = 'True' if guess == categogry else 'False (%s)' % categogry
            print(f'{iter}, {iter/n_iters*100}, {timeSince(start)}, {loss}, {line}, {guess}, {correct}')
        if iter % plot_every == 0:
            all_losses.append(current_loss / plot_every)
            current_loss = 0
    return all_losses, int(time.time() - start)


In [114]:
all_losses1, period1 = train(trainRNN)
all_losses2, period2 = train(trainLSTM)
all_losses3, period3 = train(trainGRU)

plt.figure(0)
plt.plot(all_losses1, label='RNN')
plt.plot(all_losses2, color='red', label='LSTM')
plt.plot(all_losses3, color='orange', label='GRU')
plt.legend(loc='upper left')

plt.figure(1)
x_data = ['RNN', 'LSTM', 'GRU']
y_data = [period1, period2, period3]
plt.bar(range(len(x_data)), y_data, tick_label=x_data)  # 柱状图

$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
$$$$$$$$$$$$$ 所有类别：['Arabic', 'Chinese', 'English', 'French', 'German'

ValueError: 'ä' is not in list

In [None]:
a = torch.randn(4)
b = torch.randn(4, 1)
print(f'{a}\n{b}')
print('**********************')
c = torch.add(a, b)
d = torch.add(a, b, alpha=10)
print(f'{c}\n{d}')


tensor([0.8568, 0.2878, 0.4478, 0.3480])
tensor([[ 2.7561],
        [ 0.2061],
        [ 0.6364],
        [-1.8665]])
**********************
tensor([[ 3.6129,  3.0439,  3.2039,  3.1040],
        [ 1.0629,  0.4939,  0.6538,  0.5540],
        [ 1.4932,  0.9242,  1.0841,  0.9843],
        [-1.0096, -1.5786, -1.4187, -1.5185]])
tensor([[ 28.4178,  27.8488,  28.0087,  27.9089],
        [  2.9174,   2.3484,   2.5083,   2.4085],
        [  7.2204,   6.6514,   6.8113,   6.7115],
        [-17.8079, -18.3769, -18.2170, -18.3168]])
