In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

In [4]:
filename = 'split_7.txt'
raw_text = open(filename, 'r', encoding='utf-8').read()
chars = sorted(list(set(raw_text)))
# 创建唯一字符到整数的映射
char_to_int = dict((c, i) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  9481231
Total Vocab:  4677


In [5]:
seq_length = 100
dataX = []
dataY = []
for i in range(0,n_chars-seq_length,1):
    seq_in = raw_text[i:i+seq_length]
    seq_out = raw_text[i+seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns:",n_patterns)

Total Patterns: 9481131


In [6]:
X = torch.tensor(dataX,dtype=torch.float32).reshape(n_patterns,seq_length,1)
X = X/float(n_vocab)
y = torch.tensor(dataY)
print(X.shape,y.shape)

torch.Size([9481131, 100, 1]) torch.Size([9481131])


In [7]:
class CharModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1,hidden_size=256,num_layers=2,batch_first=True,dropout=0.2)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256, n_vocab)
    def forward(self,x):
        x,_ = self.lstm(x)
        # tkae only the last output
        x = x[:,-1,:]
        # produc output
        x = self.linear(self.dropout(x))
        return x

In [8]:
class BiLSTMModel(nn.Module):
    def __init__(self, n_vocab=10000):  # 设置默认值
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256 * 2, n_vocab)
       
    def forward(self, x):
        x, _ = self.lstm(x)
        x = x[:, -1, :]
        x = self.linear(self.dropout(x))
        return x


In [14]:
n_epochs = 10
batch_size=128
model = BiLSTMModel()

optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss(reduction = 'sum')
loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=batch_size)
best_model = None
best_loss = np.inf
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch.to(device))
        loss = loss_fn(y_pred, y_batch.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    model.eval()
    loss = 0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            y_pred = model(X_batch.to(device))
            loss += loss_fn(y_pred, y_batch.to(device))
       
        if loss < best_loss:
            best_loss = loss
            best_model = model.state_dict()
        print("Epoch %d: Cross-entropy: %.4f" % (epoch, loss))                                                                         

torch.save([best_model, char_to_int], "single-char-bilstm.pth")

cuda
Epoch 0: Cross-entropy: 25686868.0000
Epoch 1: Cross-entropy: 25456712.0000
Epoch 2: Cross-entropy: 24028546.0000
Epoch 3: Cross-entropy: 23259928.0000
Epoch 4: Cross-entropy: 22753132.0000
Epoch 5: Cross-entropy: 22436572.0000
Epoch 6: Cross-entropy: 27116356.0000
Epoch 7: Cross-entropy: 25849936.0000
Epoch 8: Cross-entropy: 24054394.0000
Epoch 9: Cross-entropy: 23157654.0000


In [10]:
best_model, char_to_int = torch.load("single-char-bilstm.pth")
#model = CharModel()
model=BiLSTMModel()
n_vocab = len(char_to_int)
int_to_char = dict((i,c) for c, i in char_to_int.items())
model.load_state_dict(best_model)

<All keys matched successfully>

In [12]:
# randomly generate a prompt
temperature = 1 # 温度参数，用于控制输出的随机性
seq_length = 50
start = np.random.randint(0, len(raw_text)-seq_length)
prompt = raw_text[start:start+seq_length]
#prompt = "我说你好"
pattern = [char_to_int[c] for c in prompt]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
print('Prompt: "%s"' % (prompt))
with torch.no_grad():
    for i in range(200):
        x = np.reshape(pattern, (1, len(pattern), 1)) / float(n_vocab)
        x = torch.tensor(x, dtype=torch.float32)
        prediction = model(x.to(device))
        prediction /= temperature
        index = int(prediction.argmax())
        result = int_to_char[index]
        print(result, end='')
        pattern.append(index)
        pattern = pattern[1:]
print()
print("Done.")

Prompt: " 到 最 后 它 到 达 了 陆 地 — — 到 达 一 块 陌 生 的 陆 地 。 这 儿 人 们"
 的 一 个 小 子 的 一 个 小 子 的 小 子 ， 他 们 都 是 一 个 小 子 的 人 。 
 “ 我 们 的 一 个 人 么 也 不 会 的 。 ” 
 “ 我 们 的 一 个 人 么 ？ ” 他 说 ， “ 我 们 的 心 子 是 一 个 人 么 的 事 情 。 ” 
 “ 我 的 ， ” 医 生 说 ， “ 我 们 的 到 一 个 人 么 也 不 会 的 。 ” 
 “ 我 的 ，
Done.
