In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import os
import matplotlib.pyplot as plt

tf.enable_eager_execution()

In [2]:
learning_rate = 0.01
training_epochs = 6
batch_size = 10

In [3]:
cur_dir = os.getcwd()
model_dir_name = 'gan_eager_mode'
checkpoint_dir = os.path.join(cur_dir, 'checkpoints', model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)

In [4]:
mnist = keras.datasets.mnist
(train_x, train_y), (test_x, test_y) = mnist.load_data()

print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

train_x = train_x.astype(np.float32) / 255
test_x = test_x.astype(np.float32) / 255

train_x = np.expand_dims(train_x, 3)
test_x = np.expand_dims(test_x, 3)

train_y = to_categorical(train_y, 10)
test_y = to_categorical(test_y, 10)

train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(buffer_size=70000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y)).batch(batch_size)

print('After preprocessing')
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)
After preprocessing
(60000, 28, 28, 1) (60000, 10) (10000, 28, 28, 1) (10000, 10)


In [5]:
class G(keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.dense1 = keras.layers.Dense(49*3, activation=None)
        self.batch1 = keras.layers.BatchNormalization()
        self.dropout1 = keras.layers.Dropout(0.3)
        self.relu1 = keras.layers.ReLU()
        self.reshape1 = keras.layers.Reshape((7, 7, 3))
        
#         self.upsample1 = keras.layers.UpSampling2D((2, 2))
        self.conv2_transpose = keras.layers.Conv2DTranspose(16, 3, 2, padding='SAME', activation=None)
        self.conv2 = keras.layers.Conv2D(16, 3, padding='SAME')
        self.batch2 = keras.layers.BatchNormalization()
        self.relu2 = keras.layers.ReLU()
        
#         self.upsample2 = keras.layers.UpSampling2D((2, 2))
        self.conv3_transpose = keras.layers.Conv2DTranspose(32, 3, 2, padding='SAME', activation=None)
        self.conv3 = keras.layers.Conv2D(32, 3, padding='SAME')
        self.batch3 = keras.layers.BatchNormalization()
        self.relu3 = keras.layers.ReLU()
        
        self.conv4 = keras.layers.Conv2D(1, 3, padding='SAME', activation=tf.nn.sigmoid)
        
    def call(self, inputs, training=False):
        net = self.dense1(inputs)
        net = self.batch1(net)
        net = self.dropout1(net)
        net = self.relu1(net)
        net = self.reshape1(net)
        net = self.conv2_transpose(net)
        net = self.conv2(net)
        net = self.batch2(net)
        net = self.relu2(net)
        net = self.conv3_transpose(net)
        net = self.conv3(net)
        net = self.batch3(net)
        net = self.relu3(net)
        net = self.conv4(net)
        
        return net

        
class D(keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = keras.layers.Conv2D(16, 3, padding='SAME', activation=None)
        # (None, 28, 28, 1) => (None, 28, 28, 16)
        self.batch1 = keras.layers.BatchNormalization()
        self.dropout1 = keras.layers.Dropout(0.3)
        self.lrelu1 = keras.layers.LeakyReLU()
        self.pool1 = keras.layers.MaxPool2D((2,2), padding='SAME')
        # (None, 28, 28, 16) => (None, 14, 14, 16)
        
        self.conv2 = keras.layers.Conv2D(32, 3, padding='SAME', activation=None)
        # (None, 14, 14, 16) => (None, 14, 14, 32)
        self.batch2 = keras.layers.BatchNormalization()
        self.dropout2= keras.layers.Dropout(0.3)
        self.lrelu2 = keras.layers.LeakyReLU()
        self.pool2 = keras.layers.MaxPool2D((2, 2), padding='SAME')
        # (None, 14, 14, 32) => (None, 7, 7, 32)
        
        self.conv3 = keras.layers.Conv2D(64, 7, padding='SAME', activation=None)
        # (None, 7, 7, 32) => (None, 1, 1, 64)
        self.lrelu3 = keras.layers.LeakyReLU()
        self.flatten3 = keras.layers.Flatten()
        self.out_layer = keras.layers.Dense(1, activation=tf.nn.sigmoid)
        
        
    def call(self, inputs, training=False):
        net = self.conv1(inputs)
        net = self.batch1(net)
        net = self.dropout1(net)
        net = self.lrelu1(net)
        net = self.pool1(net)
        
        net = self.conv2(net)
        net = self.batch2(net)
        net = self.dropout2(net)
        net = self.lrelu2(net)
        net = self.pool2(net)
        
        net = self.conv3(net)
        net = self.lrelu3(net)
        net = self.flatten3(net)
        net = self.out_layer(net)
        
        return net

In [6]:
gen = G()
dis = D()

In [7]:
epsilon = 1e-7

def g_loss_fn(g_model, d_model, z):
    fake = d_model(g_model(z))
    loss = -tf.reduce_mean(tf.log(fake+epsilon))
    return loss


def d_loss_fn(g_model, d_model, z, x):
    fake = d_model(g_model(z))
    real = d_model(x)
    loss = -tf.reduce_mean(tf.log(real+epsilon)+tf.log(1-fake+epsilon))
    return loss
    

def g_grads(g_model, d_model, z):
    with tf.GradientTape() as tape:
        loss = g_loss_fn(g_model, d_model, z)
    return tape.gradient(loss, g_model.variables)
            

def d_grads(g_model, d_model, z, x):
    with tf.GradientTape() as tape:
        loss = d_loss_fn(g_model, d_model, z, x)
    return tape.gradient(loss, d_model.variables)

In [8]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=0.01)

checkpoint = tf.train.Checkpoint(g=gen, d=dis, optimizer=optimizer)

In [9]:
num_train_data = train_x.shape[0]
num_test_data = test_x.shape[0]

status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
status.initialize_or_restore()

for epoch in range(training_epochs):
    
    d_avg_loss = 0
    g_avg_loss = 0 
    train_step = 0
    
    
    for images, labels in train_dataset:
        step_batch_size = labels.shape[0]
        z = np.random.uniform(size=[step_batch_size, 4])
        c = np.argmax(labels, axis=1)
        c = c.reshape([-1,1])
        z = np.concatenate([z, c], axis=1)
 
        # d update
        d_grad = d_grads(gen, dis, z, images)
        optimizer.apply_gradients(zip(d_grad, dis.variables))
        d_loss = d_loss_fn(gen, dis, z, images)
        
        d_avg_loss += d_loss
    
        # g update
        g_grad = g_grads(gen, dis ,z)
        optimizer.apply_gradients(zip(g_grad, gen.variables))
        g_loss = g_loss_fn(gen, dis, z)
        
        g_avg_loss += g_loss
        
    d_avg_loss /= train_step
    g_avg_loss /= train_step
    
    print("#{}Epoch".format(epoch+1))
    print("d_avg_loss:{}\tg_avg_loss:{}".format(d_avg_loss, g_avg_loss))
    
    

Instructions for updating:
Colocations handled automatically by placer.


InternalError: Could not find valid device for node.
Node: {{node FusedBatchNorm}}
All kernels registered for op FusedBatchNorm :
  device='CPU'; T in [DT_FLOAT]
 [Op:FusedBatchNorm]

In [12]:
tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

[]