1.仿照课件关于歌词生成例子，在课件示例基础上将LSTM网络改为GRU且多层堆砌，优化网络层数及其它参数，尽力提升效果。


In [None]:
import tensorflow as tf
from tensorflow import keras
import random
import zipfile
import numpy as np
import math
from keras.models import Sequential
from keras import Input
from keras.layers import Dense,Dropout,Activation,Embedding,GRU
from keras.optimizers import Adam

In [None]:
with zipfile.ZipFile('data/jaychou_lyrics.txt.zip') as zin:
    with zin.open('jaychou_lyrics.txt') as f:
        corpus_chars = f.read().decode('utf-8')

print(corpus_chars[:20])

In [None]:
idx_to_char = list(set(corpus_chars))
char_to_idx = {char:i for i,char in enumerate(idx_to_char)}
vocab_size = len(char_to_idx)
vocab_size

In [None]:
corpus_indices = [char_to_idx[char] for char in corpus_chars]
len(corpus_chars)

In [None]:
sample = corpus_indices[:20]
print('indices:', sample)
print('chars:', ''.join([idx_to_char[idx] for idx in sample]))

In [None]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,ctx=None):
    corpus_indices = np.array(corpus_indices)
    data_len = len(corpus_indices)
    batch_len = data_len // batch_size

    indices = corpus_indices[0:batch_size*batch_len].reshape((batch_size,batch_len))
    epoch_size = (batch_len-1) // num_steps
    for i in range(epoch_size):
        i = i*num_steps
        X = indices[:, i:i+num_steps]
        Y = indices[:, i+1:i+num_steps+1]
        yield X,Y

my_seq = list(range(30))
for X,Y in data_iter_consecutive(my_seq,batch_size=2,num_steps=6):
    print('X:',X,'\nY:',Y,'\n')

In [None]:
num_hiddens = 256
batch_size = 160
num_steps = 35

model = Sequential()
model.add(Input(batch_input_shape=(batch_size,num_steps)))
model.add(Embedding(output_dim=256,input_dim=vocab_size,input_length=num_steps))
model.add(GRU(units=num_hiddens,dropout=0.2,recurrent_dropout=0.2,
              return_sequences=True,stateful=True))
model.add(GRU(units=num_hiddens,dropout=0.2,
              return_sequences=True,stateful=True))
model.add(Dense(units=num_hiddens,activation='relu'))
model.add(Dropout(0.2))
model.add(GRU(units=num_hiddens,dropout=0.3,recurrent_dropout=0.3,
              return_sequences=True,stateful=True))
model.add(Dense(units=vocab_size,activation='softmax'))

model.summary()

In [None]:
from keras.utils import plot_model
plot_model(model=model,show_shapes=True)

In [None]:
def sample(preds,temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1,preds,1)
    return np.argmax(probas)

def predict_rnn_keras(prefix,num_chars):
    model.reset_states()
    output = [char_to_idx[prefix[0]]]

    for t in range(num_chars+len(prefix)-1):
        X = (np.array([output[-1]]).repeat(batch_size)).reshape((batch_size,1))
        Y = model(X)
        if t < len(prefix)-1:
            output.append(char_to_idx[prefix[t+1]])
        else:
            output.append(sample(np.array(Y[0,0,:])))

    return ''.join([idx_to_char[i] for i in output])

In [None]:
predict_rnn_keras('分开'，10)

In [None]:
def grad_clipping(grads,theta):
    norm = np.array([0])
    for i in range(len(grads)):
        norm += tf.math.reduce_sun(grads[i]**2)
    norm = np.sqrt(norm).item()

    if norm <= theta:
        return grads
    
    new_grads = []
    for grad in grads:
        new_grads.append(grad*theta/norm)

    return new_grads

In [None]:
opt = Adam(learning_rate=1e-3,clipnorm=0.1)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,metrics=['accuracy'])

In [None]:
def train_and_predict_rnn_keras(num_epochs,batch_size,pred_period,pred_len,prefixes):
    for epoch in range(num_epochs):
        l_sum,n = 0.0,0
        model.reset_states()
        data_iter = data_iter_consecutive(corpus_indices,batch_size,num_steps)

        for X,Y in data_iter:
            y_pred = model.train_on_batch(X,Y)
            loss = y_pred[0]
            l_sum += loss
            n += 1
        
        if (epoch+1) % pred_period == 0:
            print('epoch %d, perplexity %f' % (epoch+1, math.exp(l_sum/n)))
            for prefix in prefixes:
                print('>>', predict_rnn_keras(prefix,pred_len))


num_epochs = 1000
train_and_predict_rnn_keras(num_epochs,batch_size,100,50,['想要','我们'])