# Introduction to generative adversarial networks

In [1]:
!python3 ../testenv.py

Using TensorFlow backend.
[1m[31mtensorflow          1.11.0              [0m
[1m[32mkeras               2.2.4               [0m
[1m[33mtorch               1.0.0.dev20180929   [0m
[1m[34msklearn             0.20.0              [0m
[1m[35mnumpy               1.15.2              [0m
[1m[36mscipy               1.1.0               [0m
[1m[31mpandas              0.23.4              [0m
[1m[32mmatplotlib          3.0.0               [0m
[1m[33mcv2                 3.4.2               [0m
[1m[34mPIL                 5.3.0               [0m
[1m[35mIPython             6.2.1               [0m
[1m[36mjupyterlab          0.35.1              [0m
[5m[31msys platform: darwin[0m
[5m[32msys.version_info(major=3, minor=6, micro=6, releaselevel='final', serial=0)[0m


## $\mathrm{GAN}$ 组成  

1. 生成网络 ($\mathrm{Generator\;network}$)  
2. 辨别网络 ($\mathrm{Discriminator\;network}$)  
![](https://i.loli.net/2018/10/27/5bd3d83e0424f.png)

## $\mathrm{A\; schematic \;GAN\; implementation}$

### High Leve API: Keras

> $\mathbf{model:}$ $\mathrm{deep \;convolutional \;GAN \;}$ $\mathbf{(DCGAN)} $  
$\mathbf{dataset:}$ :CIFAR10, a dataset of $50,000\; 32 \times 32$ RGB images belonging to $10$ classes ($5,000$ images per class).  

#### DCGAN

1. A **generator network** maps vectors of shape $\text{(latent_dim,)}$ to images of shape $(32, 32, 3)$.  
2. A **discriminator network** maps images of shape (32, 32, 3) to a binary score estimating the probability that the image is real.  
3. A **gan network** chains the generator and the discriminator together: $\mathbf{gan(x) = discriminator(generator(x))}$
>Thus this **gan network** maps latent space vectors to the discriminator’s assessment of the realism of these latent vectors as decoded by the generator.
4. train the **discriminator** using examples of real and fake images along with `“real”/“fake”` labels  
5. train the **generator**, using the gradients of the generator’s weights with regard to the loss of the gan model.
>train the **generator** to fool the discriminator.

#### "Alchemy"
1. using `tanh` as the last activation in the generator
2. sample points from the latent space using a `normal distribution`
3. Stochasticity, aka Xuanxue  
>GANs are likely to get stuck in all sorts of ways and stochasticity is good to induce robustness. 
4. no `Sparse gradients` 
>if you alreadly use max pooling operations and ReLU activations in network, here recommend using a `LeakyReLU`
5.unequal coverage of the pixel space in the generator, make sure kernel size that’s divisible by the stride size whenever we use a strided Conv2DTranpose or Conv2D in both the generator and the discriminator.

##### $\mathrm{GAN \;generator\; network}$

In [1]:
import keras
from keras import layers
import numpy as np

latent_dim = 32
h, w, channels = 32, 32, 3

# 输入32维随机向量
generator_input = keras.Input(shape=(latent_dim,))

# 全连接层
x = layers.Dense(128*16*16)(generator_input)
# 激活函数层 LeakyReLU
x = layers.LeakyReLU()(x)
# Reshape层
x = layers.Reshape((16, 16, 128))(x)

# 256个5×5卷积核，保持图像尺寸
x = layers.Conv2D(filters=256, kernel_size=5, padding='same')(x)
x = layers.LeakyReLU()(x)

# 转置卷积层，256个4×4卷积核，
x = layers.Conv2DTranspose(filters=256, kernel_size=4, strides=2, padding='same')(x)
x = layers.LeakyReLU()(x)

# 两层 256个大小为5×5的卷积核
x = layers.Conv2D(filters=256, kernel_size=5, padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(filters=256, kernel_size=5, padding='same')(x)
x = layers.LeakyReLU()(x)

# 输出卷积层，3个大小为7×7的卷积核，输出为32×32×3
x = layers.Conv2D(channels, kernel_size=7, activation='tanh', padding='same')(x)
generator=keras.models.Model(generator_input, x)
generator.summary()

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32768)             1081344   
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 32768)             0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 16, 16, 128)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 256)       819456    
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 16, 16, 256)       0         
_________________________________________________________________
conv2d_transpose_1 (Conv2DTr (None, 32, 32, 256)       1048832   
__________

##### $\mathrm{GAN \;discriminator\; network}$

In [2]:
discriminator_input = layers.Input(shape=(h, w, channels))

x = layers.Coznv2D(128, 3)(discriminator_input)
x = layers.LeakyReLU()(x)

x = layers.Conv2D(128, 4, strides=2)(x)
x = layers.LeakyReLU()(x)

x = layers.Conv2D(128, 4, strides=2)(x)
x = layers.LeakyReLU()(x)

x = layers.Conv2D(128, 4, strides=2)(x)
x = layers.LeakyReLU()(x)

x = layers.Flatten()(x)

# Dropout技术
x = layers.Dropout(0.4)(x)

# sigmoid 分类器
x = layers.Dense(units=1, activation='sigmoid')(x)

discriminator = keras.models.Model(discriminator_input, x)

discriminator_optimizer = keras.optimizers.RMSprop(
    lr=0.0008,
    clipvalue=1.0,
    decay=1e-8)
discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy')
discriminator.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 30, 30, 128)       3584      
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 30, 30, 128)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 14, 14, 128)       262272    
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 6, 6, 128)         262272    
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 6, 6, 128)         0         
__________

##### $\mathrm{Adversarial \;network}$

In [3]:
discriminator.trainable = False

gan_input = keras.Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = keras.models.Model(gan_input, gan_output)

gan_optimizer = keras.optimizers.RMSprop(
    lr=0.0004,
    clipvalue=1.0,
    decay=1e-8)
gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy')
gan.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 32)                0         
_________________________________________________________________
model_1 (Model)              (None, 32, 32, 3)         6264579   
_________________________________________________________________
model_2 (Model)              (None, 1)                 790913    
Total params: 7,055,492
Trainable params: 6,264,579
Non-trainable params: 790,913
_________________________________________________________________


##### $\mathrm{train\;model}$

1. Draw random points in the latent space (random noise).   
2. Generate images with generator using this random noise.   
3. Mix the generated images with real ones.  
4. Train discriminator using these mixed images, with corresponding targets:  
5. Draw new random points in the latent space.  
6. Train gan using these random vectors, only updates the weights of the generator  

In [12]:
import os
from keras.preprocessing import image
(x_tranin, y_train), (_,_) = keras.datasets.cifar10.load_data()

x_tranin = x_tranin[y_train.flatten() == 6]

x_tranin = x_tranin.reshape(
    (x_tranin.shape[0],)+(h, w, channels)
).astype('float32') / 255.0
iterations = 10000
batch_size = 20
save_dir = './tmp'

start = 0
for step in range(iterations):
    random_letent_vectors = np.random.normal(size=(batch_size, latent_dim))
    
    generated_images = generator.predict(random_letent_vectors)
    
    stop = start + batch_size
    real_images = x_tranin[start: stop]
    combined_images = np.concatenate([generated_images, real_images])
    
    labels = np.concatenate([np.ones((batch_size, 1)),
                            np.zeros((batch_size, 1))])
    labels += 0.05 * np.random.random(labels.shape) # add some noise to labels
    
    d_loss = discriminator.train_on_batch(combined_images, labels)
    
    random_letent_vectors = np.random.normal(size=(batch_size, latent_dim))
    
    misleading_targets = np.zeros((batch_size, 1))
    
    a_loss = gan.train_on_batch(random_letent_vectors, misleading_targets)
    
    start += batch_size
    if start > len(x_tranin) - batch_size:
        start = 0
    if step % 100 ==0:
        gan.save_weights('gan.h5')
        
        print('discriminator loss', d_loss)
        print('adversarial loss', a_loss)
        
        img = image.array_to_img(generated_images[0] * 255., scale=False)
        img.save(os.path.join(save_dir, 'real_frog' + str(step) + '.png'))
        
        img = image.array_to_img(real_images[0] * 255., scale=False)
        img.save(os.path.join(save_dir, 'real_frog' + str(step) + '.png'))
        

  'Discrepancy between trainable weights and collected trainable'


discriminator loss 8.393893
adversarial loss 15.838412


KeyboardInterrupt: 

In [5]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers.core import Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import mnist
import numpy as np
from PIL import Image
import argparse
import math
import keras

#### 生成器网络

In [7]:
def generator_model():
    """
    定义可生成图像的模型G(z;θ_g)
    """
    # 序贯模型
    model = Sequential()
    # 全连接层，输入为100维向量， 输出1024维
    model.add(Dense(input_dim=100, output_dim=1024))
    # 激活函数tanh
    model.add(Activation('tanh'))
    # 全连接层，输出为128*7*7维
    model.add(Dense(units=128*7*7))
    # 批量归一化层, 每个batch上将前一层的激活值规范化
    model.add(BatchNormalization())
    # 激活函数tanh
    model.add(Activation('tanh'))
    # 128*7*7 vector to 7×7×128 Tonsor
    model.add(Reshape(target_shape=(7,7,128), input_shape=(128*7*7,None))) # 7×7×128
    # 2维上采样，与上池化的区别是用该数字填充，而非0
    model.add(UpSampling2D(size=(2,2))) # 14×14×128
    # 2维卷积核，大小为5×5, 激活函数tanh, 64个卷积核，padding保持图像尺寸不变
    model.add(Conv2D(filters=64,kernel_size=(5,5), padding='same')) # 28×28×64
    model.add(Activation('tanh'))
    # 2维上采样
    model.add(UpSampling2D(size=(2,2)))
    # 卷积核为1输出图像的维度
    model.add(Conv2D(filters=1, kernel_size=(5,5), padding='same')) # 28×28×1
    model.add(Activation('tanh'))
    return model

#### 拼接

In [4]:
def generator_containing_discriminator(g, d):
    """
    训练生成模型时固定判别器以极小化maxV(G,D*),
    组合生成器、判别器
    """
    model = Sequential()
    # 
    model.add(g)
    # 
    d.trainable = False
    model.add(d)
    return model

In [None]:
def combine_images(generated_images):
    #生成图片拼接
    num = generated_images.shape[0]
    width = int(math.sqrt(num))
    height = int(math.ceil(float(num)/width))
    shape = generated_images.shape[1:3]
    image = np.zeros((height*shape[0], width*shape[1]),dtype=generated_images.dtype)
    for index, img in enumerate(generated_images):
        i = int(index/width)
        j = index % width
        image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = img[:, :, 0]
    return image

#### 判别器网络

In [5]:
def discriminator_model():
    """
    
    """
    model = Sequential()
    model.add(
        Conv2D(
          filters=64, 
          kernel_size=(5, 5),
          padding='same', 
          input_shape=(28, 28, 1))
        )
    model.add(Activation('tanh'))
    #最大值池化层
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(filters=128, kernel_size=(5,5)))
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    # 一维化，卷积层到全连接层的过渡
    model.add(Flatten())
    model.add(Dense(units=1024))
    model.add(Activation('tanh'))
    # 二维分类器
    model.add(Dense(units=1))
    model.add(Activation(activation='sigmoid'))
    return model

#### 训练

In [8]:
def train(BATCH_SIZE):
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
    print('X_train shape is ', X_train.shape) # (60000, 28, 28)
    print('y_train shape is ', y_train.shape) # (60000,)
    
    X_train = (X_train.astype(np.float32)-127.5)/127.5
    X_train = X_train[:, :, :, None] # (60000, 28, 28, 1) => 28×28×1
    X_test = X_test[:, :, :, None]
    
    # 定义模型
    d = discriminator_model()
    g = generator_model()
    d_on_g = generator_containing_discriminator(g, d) # fixed d to min maxV(G,D*)
    
    # 定义优化器
    d_optim = SGD(lr=0.001, momentum=0.9, nesterov=True)
    g_optim = SGD(lr=0.001, momentum=0.9, nesterov=True)
    
    # 编译模型
    g.compile(loss='binary_crossentropy', optimizer='SGD')
    d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim)
    
    # 固定判别器训练出了生成器后启动判别器网络
    d.trainable = True
    d.compile(loss='binary_crossentropy', optimize=d_optim)
    
    for epoch in range(30):
        print('Epoch is ', epoch)
        print('Number of batches', int(X_train.shape[0]//BATCH_SIZE))
        
        for idx in range(int(X_train.shape[0]//BATCH_SIZE)):
            # 生成均匀分布的噪声 BATCH_SIZE×100
            noise = np.random.uniform(min=-1.0, max=1.0, size=(BATCH_SIZE,100))
            # 抽泣一个batch的真实图片集
            img_batch = X_train[idx*BATCH_SIZE:(idx+1)*BATCH_SIZE]
            generated_img = g.predict(noise, verbose=0) # no log
            if idx % 100 == 0:
                img = combine_images(generated_img)
                img = img * 127.5 + 127.5
                Image.fromarray(img.astype(np.uint8)).save('./GAN/'+str(epoch)+"_"+str(idx)+'.png')
                # 组合真实图片和生成图片
                X = np.concatenate((img_batch, generated_img))
                # 产生图片标签
                y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
                d_loss = d.train_on_batch(X, y)
                print("batch %d d_loss : %f" % (index, d_loss))

                #随机生成的噪声服从均匀分布
                noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))

                #固定判别器
                d.trainable = False

                #计算生成器损失；在一个batch的数据上进行一次参数更新
                g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE)

                #令判别器可训练
                d.trainable = True
                print("batch %d g_loss : %f" % (index, g_loss))

                #每100次迭代保存一次生成器和判别器的权重
                if index % 100 == 9:
                    g.save_weights('generator', True)
                    d.save_weights('discriminator', True)

In [9]:
def generate(BATCH_SIZE, nice= False ):
    #训练完模型后，可以运行该函数生成图片
    g = generator_model()
    g.compile(loss='binary_crossentropy', optimizer="SGD")
    g.load_weights('generator')
    if nice:
        d = discriminator_model()
        d.compile(loss='binary_crossentropy', optimizer="SGD")
        d.load_weights('discriminator')
        noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100))
        generated_images = g.predict(noise, verbose=1)
        d_pret = d.predict(generated_images, verbose=1)
        index = np.arange(0, BATCH_SIZE*20)
        index.resize((BATCH_SIZE*20, 1))
        pre_with_index = list(np.append(d_pret, index, axis=1))
        pre_with_index.sort(key=lambda x: x[0], reverse=True)
        nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32)
        nice_images = nice_images[:, :, :, None]
        for i in range(BATCH_SIZE):
            idx = int(pre_with_index[i][1])
            nice_images[i, :, :, 0] = generated_images[idx, :, :, 0]
        image = combine_images(nice_images)
    else:
        noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))
        generated_images = g.predict(noise, verbose=0)
        image = combine_images(generated_images)
    image = image*127.5+127.5
    Image.fromarray(image.astype(np.uint8)).save(
        "./GAN/generated_image.png")

In [None]:
generate(132)