In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [12]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, device):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.device = device
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(self.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out)
        return out

In [13]:
import string
# 用字符分词法构建词典
char_set = string.ascii_lowercase + ' '
char_to_idx = {char: idx for idx, char in enumerate(char_set)}
idx_to_char = {idx: char for idx, char in enumerate(char_set)}

# 准备输入数据和目标数据
input_seq = 'hello'
target_seq = 'elloh'

input_data = [char_to_idx[char] for char in input_seq]
target_data = [char_to_idx[char] for char in target_seq]

input_tensor = torch.tensor(input_data).unsqueeze(0)
target_tensor = torch.tensor(target_data)
input_tensor = torch.nn.functional.one_hot(input_tensor, num_classes=len(char_set)).float()

In [15]:
# 超参数
input_size = len(char_set)
hidden_size = 128
output_size = len(char_set)

# 创建模型
model = SimpleRNN(input_size, hidden_size, output_size, torch.device('cpu'))

# 定义损失函数和优化器
criteron = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 1000
for epoch in range(epochs):
    model.train()
    
    # 前向传播
    output = model(input_tensor)
    
    # 计算损失
    loss = criteron(output.reshape(-1, output_size), target_tensor)
    
    # 反向传播和优化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # 每100步打印一次损失
    if epoch % 100 == 0:
         print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')

Epoch [0/1000], Loss: 3.3730
Epoch [100/1000], Loss: 0.0602
Epoch [200/1000], Loss: 0.0092
Epoch [300/1000], Loss: 0.0042
Epoch [400/1000], Loss: 0.0024
Epoch [500/1000], Loss: 0.0016
Epoch [600/1000], Loss: 0.0012
Epoch [700/1000], Loss: 0.0009
Epoch [800/1000], Loss: 0.0007
Epoch [900/1000], Loss: 0.0006


In [28]:
# 进行推理
model.eval()

def str_to_vector(s):
    input_test = torch.tensor([char_to_idx[char] for char in s]).unsqueeze(0)
    input_test = torch.nn.functional.one_hot(input_test, num_classes=len(char_set)).float()
    return input_test

# 预测一个字符
# output = model(str_to_vector("hell"))[:, -1, :]
# predicted_idx = torch.argmax(output, dim=1).item()
# predicted_char = idx_to_char[predicted_idx]

# print(f'Predicted next character: {predicted_char}')
# 一直预测10个字符
input_str = "hell"
input_test = str_to_vector(input_str)
for _ in range(10):
    output = model(input_test)[:, -1, :]
    predicted_idx = torch.argmax(output, dim=1).item()
    predicted_char = idx_to_char[predicted_idx]
    input_str += predicted_char
    input_test = str_to_vector(input_str)
print(f'The predicted entire sentence : {input_str}')


The predicted entire sentence : hellohhllohhll
