In [188]:
%matplotlib inline
import math
import paddle
from paddle import nn
from paddle.nn import functional as F
import d2l

# 数据处理

In [189]:
# 定义起始和结束标识符
START_TOKEN = '^'
END_TOKEN = '$'

# 读取原始文件
file_path = 'male.txt'  # 替换为你的文件路径
with open(file_path, 'r') as file:
    names = file.readlines()

# 清理每个名字，并添加起始和结束标识符
processed_names = [f"{START_TOKEN}{name.strip()}{END_TOKEN}" for name in names]

# 获取最长的名字长度
max_length = max(len(name) for name in processed_names)

# 创建字符到索引的映射
all_chars = set(''.join(processed_names))
char_to_index = {char: idx + 1 for idx, char in enumerate(all_chars)}
index_to_char = {idx: char for char, idx in char_to_index.items()}
print(char_to_index)
# 将字符编码并保存为新的.txt文件，用0进行填充以保持相同长度
processed_file_path = 'processed_names.txt'  # 保存处理后的文件路径
with open(processed_file_path, 'w') as processed_file:
    for name in processed_names:
        encoded_name = [char_to_index[char] for char in name]
        # 使用0填充使每个编码后的名字长度相同
        encoded_name += [0] * (max_length - len(encoded_name))
        encoded_name = ' '.join(map(str, encoded_name))  # 用空格分隔编码以便保存
        processed_file.write(encoded_name + '\n')


{'w': 1, 'y': 2, 'N': 3, 'V': 4, 'p': 5, 'C': 6, 'z': 7, 'T': 8, 'm': 9, 'A': 10, 'd': 11, 'f': 12, '$': 13, 'k': 14, 'P': 15, 'b': 16, 'i': 17, 'O': 18, 'U': 19, 's': 20, 'u': 21, 'o': 22, '^': 23, 'F': 24, 'D': 25, 'R': 26, 'x': 27, 'c': 28, 'e': 29, 'g': 30, 'H': 31, 'Y': 32, 'Z': 33, 'B': 34, 'J': 35, 'a': 36, 'M': 37, 'I': 38, ' ': 39, 'j': 40, 'E': 41, 'W': 42, 'G': 43, 'v': 44, 't': 45, 'r': 46, '-': 47, 'K': 48, "'": 49, 'S': 50, 'L': 51, 'X': 52, 'l': 53, 'Q': 54, 'n': 55, 'h': 56, 'q': 57}


In [190]:
import numpy as np
file_path = 'processed_names.txt'  # 修改为你的文件路径
with open(file_path, 'r') as file:
    encoded_names = file.readlines()

# 转换为列表，每个元素是一个编码后的名字的列表
encoded_names = [
    list(map(int, name.strip().split())) for name in encoded_names
]
encoded_names = np.array(encoded_names)
print(encoded_names)

[[23 10 36 ...  0  0  0]
 [23 10 36 ...  0  0  0]
 [23 10 16 ...  0  0  0]
 ...
 [23 48 36 ...  0  0  0]
 [23 43 17 ...  0  0  0]
 [23 43 29 ...  0  0  0]]


In [191]:
import paddle
from paddle.io import DataLoader
from sklearn.model_selection import train_test_split

# 这里是你读取已编码名字的代码，确保已正确读取到 encoded_names

# 划分数据集：80% 训练集，10% 验证集，10% 测试集
train_names, val_test_names = train_test_split(encoded_names, test_size=0.2, random_state=42)
val_names, test_names = train_test_split(val_test_names, test_size=0.5, random_state=42)

# 创建数据加载器
batch_size = 32  # 设置 mini-batch 大小


# 创建 DataLoader
train_loader = DataLoader(
    dataset=train_names,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True

)

val_loader = DataLoader(
    dataset=val_names,
    batch_size=batch_size,
    shuffle=False,
    drop_last=True

)

test_loader = DataLoader(
    dataset=test_names,
    batch_size=batch_size,
    shuffle=False,
    drop_last=True
)

# 打印输出一个mini-batch的数据
for idx, item in enumerate(train_loader()):
    X = item[:,:-1]
    Y = item[:,1:]
    Y = F.one_hot(Y,len(char_to_index)+1)
    Y = paddle.reshape(Y, shape=[-1])
    print(Y)
    break

Tensor(shape=[29696], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [0., 0., 0., ..., 0., 0., 0.])


# 模型定义

In [192]:
import paddle
from paddle.nn import Linear


class RNN_paddle(paddle.nn.Layer): 
    """从零开始实现的循环神经网络模型"""
    def __init__(self, vocab_size, num_hiddens):
        super(RNN_paddle, self).__init__()
        self.num_hiddens = num_hiddens
        self.vocab_size = vocab_size  # 修改变量名为vocab_size
        self.i2h = paddle.nn.Linear(vocab_size, num_hiddens)
        self.h2h = paddle.nn.Linear(num_hiddens, num_hiddens)
        self.h2o = paddle.nn.Linear(num_hiddens, vocab_size)
        

    def forward(self, inputs, state):
        # 先进行one-hot
        X = paddle.nn.functional.one_hot(inputs.T, self.vocab_size)  # 将X修改为inputs
        H, = state
        outputs = []
        for X in X:  # 修改变量名X为inputs
            H = paddle.tanh(self.i2h(X) + self.h2h(H))
            Y = self.h2o(H)
            outputs.append(Y)
        return paddle.concat(x=outputs, axis=0), (H,)

    def begin_state(self, batch_size):
        return (paddle.zeros(shape=[batch_size, self.num_hiddens]), )


In [193]:
X = paddle.arange(10).reshape((2, 5))
print(X)  # 假如这是两个数字
num_hiddens = 64
net = RNN_paddle(len(char_to_index)+1, num_hiddens)
state = net.begin_state(2)
Y, new_state = net(X, state)
Y.shape, len(new_state), new_state[0].shape
print(Y)
print(F.softmax(Y))

Tensor(shape=[2, 5], dtype=int64, place=Place(gpu:0), stop_gradient=True,
       [[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]])
Tensor(shape=[10, 58], dtype=float32, place=Place(gpu:0), stop_gradient=False,
       [[ 0.05397610,  0.01679978, -0.01953557, -0.02176125,  0.02758280,
          0.02053785, -0.04334676, -0.05973249,  0.02747209,  0.03925230,
          0.12498002, -0.00884516, -0.05159238, -0.11097369,  0.08773311,
         -0.14795548, -0.16523133, -0.07006428, -0.00715754, -0.07530754,
          0.06586301,  0.14571318, -0.05036495, -0.14330414, -0.09676053,
          0.04767501,  0.01844257, -0.11115052,  0.17007712, -0.00721151,
         -0.02323144, -0.05224719, -0.16599725,  0.02449747, -0.10089554,
         -0.18132968,  0.15819982, -0.03775397,  0.12820339, -0.10379504,
         -0.02361858, -0.08919550,  0.16424690, -0.02595349, -0.10125621,
         -0.08449287,  0.02723853, -0.03749647, -0.01434819, -0.23135513,
         -0.04760276, -0.04856937, -0.16341479, -0.04339

# 模型训练

In [194]:
# 定义模型参数
num_hiddens = 512
lr = 0.01
loss = nn.CrossEntropyLoss()
num_epochs = 30
model = RNN_paddle(len(char_to_index)+1, num_hiddens)

optimizer = paddle.optimizer.Adam(learning_rate=lr,  parameters= model.parameters()) 
print(model.parameters())

[Parameter containing:
Tensor(shape=[58, 512], dtype=float32, place=Place(gpu:0), stop_gradient=False,
       [[ 0.07504431, -0.08937682,  0.02452077, ..., -0.08960503,
         -0.00979052, -0.09207264],
        [-0.01492514,  0.06090340, -0.09600491, ..., -0.03521815,
          0.04943486,  0.08243959],
        [ 0.02515448,  0.02092591,  0.07248142, ...,  0.05058851,
         -0.06122007,  0.10113946],
        ...,
        [-0.02011482, -0.07541491, -0.04090464, ...,  0.01871093,
          0.02831748, -0.04397932],
        [-0.01754829,  0.09012064, -0.00134355, ..., -0.09615290,
          0.10001893,  0.00466319],
        [ 0.09934632,  0.04565306,  0.05140664, ..., -0.04117487,
          0.08582909,  0.05964456]]), Parameter containing:
Tensor(shape=[512], dtype=float32, place=Place(gpu:0), stop_gradient=False,
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 

In [207]:
losses = []
steps = []
def grad_clipping(net, theta):  #@save
    """裁剪梯度"""
    if isinstance(net, nn.Layer):
        params = [p for p in net.parameters() if not p.stop_gradient]
    else:
        params = net.params
    norm = paddle.sqrt(sum(paddle.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        with paddle.no_grad():
            for param in params:
                param.grad[:] *= theta / norm
                param.stop_gradient = False
def train(model):
    # 开启训练模式
    model.train()
    global_step =  0
    state = None
    clip_threshold = 1.0 
    for epoch in range(num_epochs):
        for step, data in enumerate(train_loader):
            X = data[:,:-1]
            Y = data[:,1:]
            # Y = F.one_hot(Y,len(char_to_index)+1)
            Y = paddle.reshape(Y.T, shape=[-1])
            if state is None :
                # 在第一次迭代或使用随机抽样时初始化`state`
                state = model.begin_state(batch_size=X.shape[0])
            else:
                if isinstance(net, nn.Layer) and not isinstance(state, tuple):
                    state.stop_gradient=True
                else:
                    for s in state:
                        s.stop_gradient=True
            y_hat, state = model(X, state)
            # print(F.softmax(y_hat))
            # print(y_hat,Y)
            # 计算损失
            loss = F.cross_entropy(input=y_hat, label=Y)
            loss = paddle.mean(loss)
            
            optimizer.clear_grad()
            loss.backward(retain_graph=True)
            grad_clipping(model, 1)
            optimizer.step()
            

            # 更新参数
            global_step+=1
            if step % 100 == 0:
                # 记录当前步骤的loss变化情况
                losses.append(loss.numpy()[0])
                steps.append(step)
                # 打印当前loss数值
                print("epoch %d, step %d, loss %.3f" % (epoch,global_step, loss.numpy()[0]))
            
            
    # predict = lambda prefix: predict_ch8(prefix, 7, net,char_to_index,index_to_char, device)
    # # 训练和预测
    # for epoch in range(num_epochs):
    #     train_epoch_ch8(net, train_iter, loss, updater, device, use_random_iter)
    #     if (epoch + 1) % 10 == 0:
    #         print(predict('time traveller'))
    # print(predict('Asto'))
    # print(predict('Lenb'))

In [208]:
train(model)

epoch 0, step 1, loss 1.670
epoch 0, step 101, loss 1.392
epoch 1, step 199, loss 1.374
epoch 1, step 299, loss 1.951
epoch 2, step 397, loss 1.723
epoch 2, step 497, loss 1.986
epoch 3, step 595, loss 1.740
epoch 3, step 695, loss 1.619
epoch 4, step 793, loss 1.466
epoch 4, step 893, loss 1.654
epoch 5, step 991, loss 1.874
epoch 5, step 1091, loss 1.472
epoch 6, step 1189, loss 1.548
epoch 6, step 1289, loss 1.543
epoch 7, step 1387, loss 1.360
epoch 7, step 1487, loss 1.710
epoch 8, step 1585, loss 1.564
epoch 8, step 1685, loss 1.486
epoch 9, step 1783, loss 1.624
epoch 9, step 1883, loss 1.852
epoch 10, step 1981, loss 1.732
epoch 10, step 2081, loss 2.039
epoch 11, step 2179, loss 1.966
epoch 11, step 2279, loss 1.677
epoch 12, step 2377, loss 1.574
epoch 12, step 2477, loss 1.830
epoch 13, step 2575, loss 1.452
epoch 13, step 2675, loss 1.629
epoch 14, step 2773, loss 1.472
epoch 14, step 2873, loss 1.527
epoch 15, step 2971, loss 1.413
epoch 15, step 3071, loss 1.674
epoch 16,

In [197]:
import numpy as np

def predict_name(input_names, model, char_to_index, index_to_char):
    state = None
    input_names = [char_to_index[ch] for ch in input_names]  # 将字母转换为对应的数字编码
    for name in input_names:
        X = np.array([[name]], dtype=np.float32)
        X = paddle.to_tensor(X)
        if state is None:
            state = model.begin_state(batch_size=1)
        else:
            if isinstance(model, paddle.nn.Layer) and not isinstance(state, tuple):
                state.stop_gradient = True
            else:
                for s in state:
                    s.stop_gradient = True
        y_hat, state = model(X, state)
    
    # 对预测结果进行处理
    y_prob = F.softmax(y_hat, axis=-1)
    pred = int(paddle.argmax(y_prob, axis=-1).numpy()[0][0])
    predicted_char = index_to_char[pred]  # 将数字编码转换为对应的字母
    
    return predicted_char
predict_name('Gav',model, char_to_index, index_to_char)

RuntimeError: (NotFound) The kernel with key (GPU, Undefined(AnyLayout), float32) of kernel `one_hot` is not registered and fail to fallback to CPU one. Selected wrong DataType `float32`. Paddle support following DataTypes: int32, int64.
  [Hint: Expected kernel_iter != iter->second.end(), but received kernel_iter == iter->second.end().] (at ..\paddle\phi\core\kernel_factory.cc:259)


In [209]:
def predict_ch8(prefix, num_preds, net, char_to_index, index_to_char,device):  #@save
    """预测字符

    Args:
        prefix (_type_): 要输入的字符
        num_preds (_type_): 要预测的数量
        net (_type_): rnn网络
        vocab (_type_): 字典
        device (_type_): 设备

    Returns:
        _type_: _description_
    """
    state = net.begin_state(batch_size=1)  # 生成隐藏层h0
    outputs = [char_to_index[prefix[0]]]  # 取第一个字符对应的索引值
    get_input = lambda: paddle.reshape(paddle.to_tensor(outputs[-1], place=device), (1, 1))  # 一个函数，取一个字符，返回1*1的向量
    for y in prefix[1:]:  # 预热期，每一个字符
        _, state = net(get_input(), state)  # 每一个字符作为输入
        outputs.append(char_to_index[y])  # 索引表示
    for _ in range(num_preds):  # 预测num_preds步
        y, state = net(get_input(), state)
        outputs.append(int(paddle.reshape(paddle.argmax(y,axis=1),shape=[1])))
    return ''.join([index_to_char[i] for i in outputs])

In [213]:
predict_ch8('^Ado', 5, model, char_to_index,index_to_char, 'gpu')

'^Adonnnnn'

In [None]:
def grad_clipping(net, theta):  #@save
    """裁剪梯度"""
    if isinstance(net, nn.Layer):
        params = [p for p in net.parameters() if not p.stop_gradient]
    else:
        params = net.params
    norm = paddle.sqrt(sum(paddle.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        with paddle.no_grad():
            for param in params:
                param.grad[:] *= theta / norm
                param.stop_gradient = False

In [None]:
# 单个epoch，里面要进行one-hot编码
def train_epoch_ch8(net, train_iter, loss, updater, device, use_random_iter):
    """_summary_

    Args:
        net (_type_):RNN
        train_iter (_type_): _训练迭代器_
        loss (_type_): _损失函数_
        updater (_type_): _description_
        device (_type_): _设备_
        use_random_iter (_type_): _description_

    Returns:
        _type_: _description_
    """
    for X, Y in train_iter():
        if state is None or use_random_iter:
            # 在第一次迭代或使用随机抽样时初始化`state`
            state = net.begin_state(batch_size=X.shape[0])
        else:
            if isinstance(net, nn.Layer) and not isinstance(state, tuple):
                # `state`对于`nn.GRU`是个张量
                state.stop_gradient=True
            else:
                # `state`对于`nn.LSTM`或对于我们从零开始实现的模型是个张量
                for s in state:
                    s.stop_gradient=True
        y = paddle.reshape(Y.T,shape=[-1])
        X = paddle.to_tensor(X, place=device)
        y = paddle.to_tensor(y, place=device)
        y_hat, state = net(X, state)
        l = loss(y_hat, y).mean()
        if isinstance(updater, paddle.optimizer.Optimizer):
            updater.clear_grad()
            l.backward()
            grad_clipping(net, 1)
            updater.step()
        else:
            l.backward()
            grad_clipping(net, 1)
            # 因为已经调用了`mean`函数
            net.params = updater(batch_size=1)
        
    return 

In [None]:
def train_ch8(net, train_iter, char_to_index,index_to_char, lr, num_epochs, device, use_random_iter=False):
    """训练

    Args:
        net (_type_): rnn网络
        train_iter (_type_): 训练迭代器
        vocab (_type_): 字典
        lr (_type_): 学习率
        num_epochs (_type_): 多少个 epoch
        device (_type_): 设备
        use_random_iter (bool, optional): _description_. Defaults to False.
    """
    loss = nn.CrossEntropyLoss()  # 定义损失函数
    # 初始化
    if isinstance(net, nn.Layer):
        updater = paddle.optimizer.SGD(
                learning_rate=lr, parameters=net.parameters())
    else:
        updater = lambda batch_size: d2l.sgd(net.params, lr, batch_size)
    predict = lambda prefix: predict_ch8(prefix, 7, net,char_to_index,index_to_char, device)
    # 训练和预测
    for epoch in range(num_epochs):
        train_epoch_ch8(net, train_iter, loss, updater, device, use_random_iter)
        if (epoch + 1) % 10 == 0:
            print(predict('time traveller'))
    print(predict('Asto'))
    print(predict('Lenb'))

In [None]:
def train()

SyntaxError: invalid syntax (3928165943.py, line 1)

In [None]:
num_epochs, lr = 10, 0.1
train_ch8(net, train_loader, char_to_index,index_to_char, lr, num_epochs, 'gpu')

ValueError: too many values to unpack (expected 2)