# 生成式深度学习
---
* 文本生成
* `DeepDream`
* 神经风格迁移
* 变分自编码器
* 生成式对抗网络

In [65]:
import tensorflow as tf
import numpy as np

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization

## 12.1 文本生成

### 12.1.1 生成式深度学习用于序列生成的简史

### 12.1.2 如何生成序列数据
---
#### As usual when working with text data, tokens are typically words or characters, and any network that can model the probability of the next token given the previous ones is called a `language model`.

### 12.1.3 采样策略的重要性

#### [C] 12.1 对于不同`softmax temperature`,对概率分布进行重新加权

In [66]:
def reweight_distribution(original_distribution, temperature=0.5):
    # original_distribution是由概率值组成的一堆NumPy数组,这些概率值之和必须等于1
    # temperature是一个因子,用于定量描述输出分布的熵
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)

    # 返回原始分布重新加权后的结果.
    return distribution / np.sum(distribution)


### 12.1.4 用`keras`实现文本生成

#### 1 准备数据 -- 使用 `IMDB`影评数据集

##### [C] 12.3 利用文本文件创建数据集(一个文件既一个样本)

In [67]:
dataset = keras.utils.text_dataset_from_directory(directory='../11Chapter/aclImdb', label_mode=None, batch_size=256)
dataset = dataset.map(lambda x:tf.strings.regex_replace(x, '<br />', ' '))

Found 30250 files belonging to 1 classes.


##### [C] 12.4 准备`TextVectorization`层

In [68]:
sequence_length    = 100
vocab_size         = 15000  # 只考虑前 15000个最常见单词，其他单词被视为未登陆词元[UNK]
text_vectorization = TextVectorization(max_tokens=vocab_size, output_mode='int', output_sequence_length=sequence_length)
text_vectorization.adapt(dataset)

##### [C] 12.5 创建语言模型数据集

In [69]:
def prepare_lm_dataset(text_batch):
    
    vectorized_sequences = text_vectorization(text_batch)  # 将文本批量转换为整数序列批量
    x                    = vectorized_sequences[:, :-1]    # 通过删掉序列中最后一个单词来创建输入
    y                    = vectorized_sequences[:, 1:]     # 通过将序列偏移1个单词来创建目标

    return x, y

lm_dataset = dataset.map(prepare_lm_dataset, num_parallel_calls=4)

#### 2 基于`Transformer`的序列到序列模型

In [70]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
        super().__init__(**kwargs)
        
        self.token_embeddings    = layers.Embedding(input_dim=input_dim      , output_dim=output_dim)  # 用于保存词元索引
        self.position_embeddings = layers.Embedding(input_dim=sequence_length, output_dim=output_dim)  # 用于保存词元位置
        self.sequence_length     = sequence_length
        self.input_dim           = input_dim
        self.output_dim          = output_dim

    def call(self, inputs):
        length             = tf.shape(inputs)[-1]
        positions          = tf.range(start=0, limit=length, delta=1)
        embedded_tokens    = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        
        return tf.math.not_equal(inputs, 0)

    # 实现序列化，以便保存模型
    def get_config(self):
        config = super().get_config()
        
        config.update({
            "output_dim"     : self.output_dim,
            "sequence_length": self.sequence_length,
            "input_dim"      : self.input_dim,
        })

        return config

In [71]:
class TransformerDecoder(layers.Layer):
    
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__()

        self.embed_dim   = embed_dim
        self.dense_dim   = dense_dim
        self.num_heads   = num_heads
        self.attention_1 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.attention_2 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj  = keras.Sequential([layers.Dense(dense_dim, activation='relu'),  layers.Dense(embed_dim), ])
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True
    
    def get_config(self):
        
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })

        return config
    
    def get_causal_attention_mask(self, inputs):

        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i    = tf.range(sequence_length)[:, tf.newaxis]
        j    = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
        mult = tf.concat([tf.expand_dims(batch_size, -1),
                        tf.constant([1, 1], dtype=tf.int32)], axis=0)
        
        return tf.tile(mask, mult)
    
    def call(self, inputs, encoder_outputs, mask=None):
        
        causal_mask = self.get_causal_attention_mask(inputs)
        
        if mask is not None:
            padding_mask = tf.cast(
                    mask[:, tf.newaxis, :], dtype="int32")
            padding_mask = tf.minimum(padding_mask, causal_mask)
        
        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=causal_mask)
        
        attention_output_1 = self.layernorm_1(inputs + attention_output_1)
        
        attention_output_2 = self.attention_2(
            query=attention_output_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
        )
        
        attention_output_2 = self.layernorm_2(
            attention_output_1 + attention_output_2)
        proj_output = self.dense_proj(attention_output_2)
        
        return self.layernorm_3(attention_output_2 + proj_output)


##### [C] 12.6 基于`Transformer`的简单语言模型

In [72]:
embed_dim  = 256
latent_dim = 2048
num_heads  = 2

inputs  = keras.Input(shape=(None,), dtype='int64')
x       = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs)
x       = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, x)
outputs = layers.Dense(vocab_size, activation='softmax')(x)  # 对词表中的所有单词做 softmax 运算，对每个输出序列时间步都进行计算
model   = keras.Model(inputs, outputs)


In [73]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 positional_embedding_2 (Positi  (None, None, 256)   3865600     ['input_3[0][0]']                
 onalEmbedding)                                                                                   
                                                                                                  
 transformer_decoder_2 (Transfo  (None, None, 256)   2104576     ['positional_embedding_2[0][0]', 
 rmerDecoder)                                                     'positional_embedding_2[0][0]'] 
                                                                                            

### 12.1.5 带有可变温度采样的文本生成调函数

#### [C] 12.7 文本生成回调函数

In [74]:
tokens_index = dict(enumerate(text_vectorization.get_vocabulary()))  # 一个字典，将单词索引映射为字符串，可用于文本解码

# 从概率分布进行采样，温度可变
def sample_next(predictions, temperature=1.0):

    predictions = np.asarray(predictions).astype('float64')
    predictions = np.log(predictions) / temperature
    exp_preds   = np.exp(predictions)
    predictions = exp_preds / np.sum(exp_preds)
    probas      = np.random.multinomial(1, predictions, 1)

    return np.argmax(probas)

class TextGenerator(keras.callbacks.Callback):

    def __init__(self, prompt, generate_length, model_input_length, temperatures=(1.,), print_freq=1):
        # prompt:提示词，作为文本生成的种子
        # generate_length:要生成多少个单词
        # temperatures:用于采样的温度值

        self.prompt             = prompt
        self.generate_length    = generate_length
        self.model_input_lenght = model_input_length
        self.temperatures       = temperatures
        self.print_freq         = print_freq

    def on_epoch_end(self, epoch, logs=None):
        
        if (epoch + 1) % self.print_freq != 0:
            return
        
        for temperature in self.temperatures:
            print('== Generating with temperature', temperature)
            sentence = self.prompt  # 生成文本时，初始文本为提示词
            for i in range(self.generate_length):
                tokenized_sentence = text_vectorization([sentence])
                predictions        = self.model(tokenized_sentence)
                next_token         = sample_next(predictions[0, i, :])
                sampled_token      = tokens_index[next_token]
                sentence          += ' ' + sampled_token

            print(sentence)

prompt = "This movie"

text_gen_callback = TextGenerator(
    prompt,
    generate_length=50,
    model_input_length=sequence_length,
    temperatures=(0.2, 0.5, 0.7, 1., 1.5))

#### [C] 12.8 拟合语言模型

In [75]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')
model.fit(lm_dataset, epochs=10, callbacks=[text_gen_callback])


Epoch 1/10
This movie movie  different to climb strong film back and africa how you like it a [UNK] copy to whos investigation humorous good one of political uneasy on amazing believe receive were meditation as the james terror once by some girl invited cd it gives lots on tales she cant be
== Generating with temperature 0.5
This movie was its a a johnny serious is [UNK] one full of along many point bad has that a this sound movie her looks canvas like this some film patricia with wins the the life end of capture highlander all in anything my and sequences everything and is [UNK] for samuels
== Generating with temperature 0.7
This movie isnt war first and stalks big [UNK] battle of too ray brooks the 3 years the i characters be were conroy in action httpwwwimdbcomtitlett0099422usercomments wouldnt stereotypical i slowly are she considered takes pink truly big quite cold one who steps creasy out alley believing others early role story by richard
== Generating with temperature 1.0
This m

<keras.callbacks.History at 0x292a42c94d0>