In [2]:
import tensorflow as tf
import numpy as np

In [3]:
class DataLoader():
    def __init__(self):
        path = tf.keras.utils.get_file('nietzsche.txt',
            origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
        with open(path, encoding='utf-8') as f:
            self.raw_text = f.read().lower()
        self.chars = sorted(list(set(self.raw_text)))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
        self.text = [self.char_indices[c] for c in self.raw_text]

    def get_batch(self, seq_length, batch_size):
        seq = []
        next_char = []
        for i in range(batch_size):
            index = np.random.randint(0, len(self.text) - seq_length)
            seq.append(self.text[index:index+seq_length])
            next_char.append(self.text[index+seq_length])
        return np.array(seq), np.array(next_char)       # [batch_size, seq_length], [num_batch]

In [5]:
data_loader = DataLoader()
data = data_loader.get_batch(3, 10)
print(data)

(array([[31, 51,  1],
       [27, 45,  1],
       [46, 35, 38],
       [45, 51,  7],
       [34, 31,  1],
       [ 1, 34, 35],
       [35, 40, 33],
       [32,  1, 42],
       [39, 31, 46],
       [34, 35, 45]]), array([45, 46, 38,  1, 42, 46,  1, 34, 27,  1]))


In [12]:
class RNN(tf.keras.Model):
    def __init__(self, num_chars, batch_size, seq_length):
        super().__init__()
        self.num_chars = num_chars
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.cell = tf.keras.layers.LSTMCell(units=256)
        self.dense = tf.keras.layers.Dense(units=self.num_chars)

    def call(self, inputs, from_logits=False):
        inputs = tf.one_hot(inputs, depth=self.num_chars)       # [batch_size, seq_length, num_chars]
        state = self.cell.get_initial_state(batch_size=self.batch_size, dtype=tf.float32)   # 获得 RNN 的初始状态
        for t in range(self.seq_length):
            output, state = self.cell(inputs[:, t, :], state)   # 通过当前输入和前一时刻的状态，得到输出和当前时刻的状态
        logits = self.dense(output)
        if from_logits:                     # from_logits 参数控制输出是否通过 softmax 函数进行归一化
            return logits
        else:
            return tf.nn.softmax(logits)
        
    def predict(self, inputs, temperature=1.):
        batch_size, _ = tf.shape(inputs)
        logits = self(inputs, from_logits=True)                         # 调用训练好的RNN模型，预测下一个字符的概率分布
        prob = tf.nn.softmax(logits / temperature).numpy()              # 使用带 temperature 参数的 softmax 函数获得归一化的概率分布值
        return np.array([np.random.choice(self.num_chars, p=prob[i, :]) # 使用 np.random.choice 函数，
                         for i in range(batch_size.numpy())])           # 在预测的概率分布 prob 上进行随机取样

In [13]:
num_batches = 1000
seq_length = 40
batch_size = 50
learning_rate = 1e-3

In [14]:
data_loader = DataLoader()
model = RNN(num_chars=len(data_loader.chars), batch_size=batch_size, seq_length=seq_length)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(seq_length, batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 4.041477
batch 1: loss 4.030501
batch 2: loss 4.000156
batch 3: loss 3.986092
batch 4: loss 3.963656
batch 5: loss 3.868635
batch 6: loss 3.748692
batch 7: loss 3.448666
batch 8: loss 3.370884
batch 9: loss 3.535254
batch 10: loss 3.085240
batch 11: loss 3.052670
batch 12: loss 2.872532
batch 13: loss 3.320422
batch 14: loss 3.072628
batch 15: loss 3.245717
batch 16: loss 3.285725
batch 17: loss 2.965348
batch 18: loss 3.219106
batch 19: loss 2.889103
batch 20: loss 3.113730
batch 21: loss 3.089364
batch 22: loss 3.125453
batch 23: loss 2.995502
batch 24: loss 3.132296
batch 25: loss 2.962054
batch 26: loss 3.227995
batch 27: loss 2.915867
batch 28: loss 2.866862
batch 29: loss 2.974046
batch 30: loss 3.257811
batch 31: loss 2.903676
batch 32: loss 3.065748
batch 33: loss 2.829463
batch 34: loss 2.878215
batch 35: loss 3.409382
batch 36: loss 3.154408
batch 37: loss 2.985164
batch 38: loss 3.131719
batch 39: loss 3.208561
batch 40: loss 3.121896
batch 41: loss 2.995261
ba

In [15]:
X_, _ = data_loader.get_batch(seq_length, 1)
for diversity in [0.2, 0.5, 1.0, 1.2]:      # 丰富度（即temperature）分别设置为从小到大的 4 个值
    X = X_
    print("diversity %f:" % diversity)
    for t in range(400):
        y_pred = model.predict(X, diversity)    # 预测下一个字符的编号
        print(data_loader.indices_char[y_pred[0]], end='', flush=True)  # 输出预测的字符
        X = np.concatenate([X[:, 1:], np.expand_dims(y_pred, axis=1)], axis=-1)     # 将预测的字符接在输入 X 的末尾，并截断 X 的第一个字符，以保证 X 的长度不变
    print("\n")

diversity 0.200000:
 the he the the the the tha the the seresting of the ind and and the the the the he the the the the the the sore the for the the the sore the the sore the sint of the soul the the the somering of the the sore the the the woun the the the sore the sore the thithe the the the wor the the the sont in the the the the the the the the the the the sores and and and in the here the sore the woul on the th

diversity 0.500000:
 the land whan we fore th the thime and the tanct in the hele touctomo fhin to with werersthe the berhe mond ind ond of the the sore, and the the the toung so s th of berereng of the alis cous wo is the sere
the with the the besn, on par whe fams ad lersaling and int fill wo this er ind tiun ion the poust of mirlsed th the loncist an thire the mont and of th ce rericoplinn ind momerser
the sos ith

diversity 1.000000:
 thee fontt and aprerey. io  "uelantofothad to the torali-:aind brey in. . f lome si lo tiemdor ther ravdint, con ganitt ind sto lxidpoi