In [1]:
%pylab inline

import numpy as np
import tensorflow as tf


from keras.models import Input, Sequential, Model
from keras.layers import Dense, BatchNormalization, Layer, Activation, Reshape, Dropout, LeakyReLU
from keras.datasets.mnist import load_data
from keras.utils import np_utils
from keras.losses import categorical_crossentropy
from keras import backend as K

from sklearn.metrics import accuracy_score

from tqdm import tqdm

Populating the interactive namespace from numpy and matplotlib


Using TensorFlow backend.


In [2]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4, allow_growth=True)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
K.set_session(sess)

## Data

In [3]:
(train_x, train_y), (test_x, test_y) = load_data()

train_noise = np.random.normal(0, 1, size=(train_x.shape[0], 100))
test_noise = np.random.normal(0, 1, size=(test_x.shape[0], 100))

train_x = train_x.reshape((-1, 784))
test_x = test_x.reshape((-1, 784))
train_x = train_x/255.
test_x = test_x/255.

train_y = np_utils.to_categorical(train_y)
test_y = np_utils.to_categorical(test_y)

print('train_x:', train_x.shape)
print('train_y:', train_y.shape)
print('train_noise:', train_noise.shape)
print('test_x:', test_x.shape)
print('test_y:', test_y.shape)
print('test_noise:', test_noise.shape)

train_x: (60000, 784)
train_y: (60000, 10)
train_noise: (60000, 100)
test_x: (10000, 784)
test_y: (10000, 10)
test_noise: (10000, 100)


## Tools

In [4]:
def make_trainable(model, trainable: bool):
    model.trainable = trainable
    for layer in model.layers:
        layer.trainable = trainable

def display_image(image):
    display(imshow(image.reshape((28, 28))*255, cmap='gray'))
    
def _to_tensor(x, dtype):
    x = tf.convert_to_tensor(x)
    if x.dtype != dtype:
        x = tf.cast(x, dtype)
    return x

## Model

### Generator Model

In [5]:
# Generator Model 
def create_generator_model(loss='categorical_crossentropy'):
    generator = Sequential(name='Generator')
    generator.add(Dense(196, batch_input_shape=(None, 100), name='g_layer_01'))
#     generator.add(BatchNormalization())
    generator.add(Activation('sigmoid'))

    generator.add(Dense(289))
#     generator.add(BatchNormalization())
    generator.add(Activation('sigmoid'))

    generator.add(Dense(441))
#     generator.add(BatchNormalization())
    generator.add(Activation('sigmoid'))
    
    generator.add(Dense(784))
#     generator.add(BatchNormalization())
    generator.add(Activation('sigmoid'))

#     generator.compile(loss=loss, optimizer='adam')
    return generator

generator = create_generator_model()
generator.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
g_layer_01 (Dense)           (None, 196)               19796     
_________________________________________________________________
activation_1 (Activation)    (None, 196)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 289)               56933     
_________________________________________________________________
activation_2 (Activation)    (None, 289)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 441)               127890    
_________________________________________________________________
activation_3 (Activation)    (None, 441)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 784)               346528    
__________

### Discriminator Model

In [6]:
def custom_binary_crossentropy(y_true, y_pred):
    print('discriminator loss')
    epsilon = _to_tensor( 10e-8, y_pred.dtype.base_dtype)
    y_pred = tf.clip_by_value(y_pred, epsilon, 1 - epsilon)
    return - y_true * K.log(y_pred) - (1 - y_true) * K.log(1-y_pred)

def create_discriminator_model(loss='binary_crossentropy'):
    d_input = Input(batch_shape=(None, 784), name='d_layer_01')
    H = Dense(784)(d_input)
    H = BatchNormalization()(H)
    H = LeakyReLU()(H)
    
    H = Dense(441)(H)
    H = BatchNormalization()(H)
    H = LeakyReLU()(H)
    
    H = Dense(289)(H)
    H = LeakyReLU()(H)
    
    H = Dense(2)(H)
    H = Activation('sigmoid')(H)
    
    discriminator = Model(d_input, H)
#     discriminator.compile(loss=loss, optimizer='adam')
    
    return d_input, H, discriminator

d_input, d_hidden, discriminator = create_discriminator_model(loss=custom_binary_crossentropy)
discriminator.summary()

discriminator loss
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
d_layer_01 (InputLayer)      (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 784)               615440    
_________________________________________________________________
batch_normalization_1 (Batch (None, 784)               3136      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 784)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 441)               346185    
_________________________________________________________________
batch_normalization_2 (Batch (None, 441)               1764      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 441)               0 

### GAN Model

gan model 변수는 generator를 trainining 하기 위한 모델입니다. <br>
generator를 거쳐 generated images가 만들어지면, discriminator가 


In [7]:
def minigame_loss(y_true, y_pred):
    return - categorical_crossentropy(y_true, y_pred)

def heuristic_loss(y_true, y_pred):
    # 실제로는 cross entropy
    epsilon = _to_tensor( 10e-8, y_pred.dtype.base_dtype)
    y_pred = tf.clip_by_value(y_pred, epsilon, 1 - epsilon)
    return - y_true * K.log(y_pred)

def maximum_likelihood(y_true, y_pred):
    return y_true * K.exp()

def create_gan_model2(generator, d_input, d_hidden, discriminator, loss='categorical_crossentropy'):
    gan_input = Input(batch_shape=(None, 100), name='gan_input')
    g_output = generator(gan_input)
    gan_output = discriminator(g_output)
    
#     prev_layer = g_output
#     for d_layer in discriminator.layers:
#         d_layer(prev_layer)
#         prev_layer = d_layer

#     d_model = Model(d_input, d_hidden, name='gan_discriminator')
#     d_model.trainable = False
#     gan_output = d_model(g_output)
    
    gan_labels = tf.placeholder(tf.float32, shape=(None, 2))
    gan_loss = tf.reduce_mean(categorical_crossentropy(gan_labels, gan_output))
    gan_optimizer = tf.train.AdamOptimizer().minimize(gan_loss)
    
#     gan = Model(gan_input, gan_output)
#     gan.compile(loss=loss, optimizer='adam')
    
    return gan_labels, gan_loss, gan_optimizer
    
#     loss = tf.reduce_mean(categorical_crossentropy(labels, predicts))
#     train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
    

def create_gan_model(generator, discriminator, loss='categorical_crossentropy'):
    gan = Sequential()
    gan.add(generator)
    discriminator.trainable = False
    make_trainable(discriminator, False)
    gan.add(discriminator)
    gan.compile(loss=loss, optimizer='adam')
    return gan

gan_labels, gan_loss, gan_optimizer = create_gan_model2(generator, d_input, d_hidden, 
                                                        discriminator, loss=minigame_loss)
# gan.summary()

In [12]:
generator = create_generator_model()
d_input, d_hidden, discriminator = create_discriminator_model()

gan_labels, gan_loss, gan_optimizer = create_gan_model2(generator, d_input, d_hidden, 
                                                        discriminator, loss=minigame_loss)
generator.compile(loss='categorical_crossentropy', optimizer='adam')
discriminator.compile(loss='binary_crossentropy', optimizer='adam')

In [13]:
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
    
show_graph(tf.get_default_graph().as_graph_def())

## Train

In [14]:
def next_batch(x, y, noise, idx, batch_size=250):
    return x[idx: idx+batch_size], y[idx: idx+batch_size], noise[idx: idx+batch_size]

def next_noise_batch(noise, idx, batch_size=250):    
    y = np.zeros((batch_size, 2))
    y[:, 1] = 1    
    return noise[idx: idx+batch_size], y

def shuffle(x, y, n):
    N = x.shape[0]
    permu = np.random.permutation(N)
    x = x[permu]
    y = y[permu]
    n = n[permu]
    return x, y, n

def concatenate_images(true_images, generated_images):
    n_true = true_images.shape[0]
    combined_x = np.concatenate((true_images, generated_images))
    combined_y = np.zeros((combined_x.shape[0], 2))
    combined_y[:n_true, 0] = 1 # True Images [1, 0]
    combined_y[n_true:, 1] = 1 # Generated Images [0, 1]
    
    return combined_x, combined_y


In [15]:
BATCH_SIZE = 128
EPOCH=300

def train(data_x, data_y, data_n, epochs=10, batch_size=128):
    N = train_x.shape[0]
    
    global_d_losses = []
    global_g_losses = []
    
    for epoch in range(epochs):
        data_x, data_y, data_n = shuffle(data_x, data_y, data_n)
        d_losses = []
        g_losses = []
        for step in range(0, N-BATCH_SIZE, BATCH_SIZE):
            # sample_noise: Sample minibatch of m(BATCH_SIZE) noise samples {z^1, ..., z^m} from noise prior p_g(z)
            # sample_x: Sample minibatch of m examples {x^1, ..., x^m} from data generating distribution
            sample_x, sample_y, sample_noise = next_batch(data_x, data_y, data_n, step, batch_size=batch_size)

            # combine true images and generated images for training discriminator
            generated_images = generator.predict(sample_noise)
            combined_x, combined_y = concatenate_images(sample_x, generated_images)

            # Update the discriminator by ascending tis stochastic gradient
            # train_on_batch is like fit function with only just a single minibatch. so it's trained
            d_loss = discriminator.train_on_batch(combined_x, combined_y)

            # Sample minibatch of m noise samples {z^1, ..., z^m} from noise prior p_g(z)
            noise_x, noise_y = next_noise_batch(data_n, step, batch_size=batch_size)

            # Update the generator by descending its stochastic gradient
            g_loss = gan.train_on_batch(noise_x, noise_y)

            d_losses.append(d_loss)
            g_losses.append(g_loss)
            global_d_losses.append(d_loss)
            global_g_losses.append(g_loss)

        _dl = np.array(d_losses)
        _gl = np.array(g_losses)
        print(f'[{epoch}]', 'd loss:', _dl.mean(), ', g loss:', _gl.mean())
#     plot(global_d_losses, label='dicriminator')
    plot(global_g_losses, label='generator')
    legend()
    
    return global_d_losses, global_g_losses
            
d_losses, g_losses = train(train_x, train_y, train_noise, epochs=EPOCH, batch_size=BATCH_SIZE)


InvalidArgumentError: You must feed a value for placeholder tensor 'gan_input_2' with dtype float
	 [[Node: gan_input_2 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
	 [[Node: mul_85/_127 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_889_mul_85", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'gan_input_2', defined at:
  File "/usr/local/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/local/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.6/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-d983f8225a27>", line 5, in <module>
    discriminator, loss=minigame_loss)
  File "<ipython-input-7-978c2b474a3a>", line 14, in create_gan_model2
    gan_input = Input(batch_shape=(None, 100), name='gan_input')
  File "/usr/local/lib/python3.6/site-packages/keras/engine/topology.py", line 1388, in Input
    input_tensor=tensor)
  File "/usr/local/lib/python3.6/site-packages/keras/engine/topology.py", line 1299, in __init__
    name=self.name)
  File "/usr/local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 349, in placeholder
    x = tf.placeholder(dtype, shape=shape, name=name)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1502, in placeholder
    name=name)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2149, in _placeholder
    name=name)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'gan_input_2' with dtype float
	 [[Node: gan_input_2 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
	 [[Node: mul_85/_127 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_889_mul_85", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


In [None]:
plot(g_losses[20:])

In [None]:
noise_x, noise_y = next_noise_batch(test_noise, 1)

d = generator.predict(noise_x)
display_image(d[np.random.randint(250)])

## Discriminator Accuracy

In [None]:
N = test_x.shape[0]
generated_images = generator.predict(test_noise)
_combined_x = np.concatenate((test_x, generated_images))
y_true = np.zeros((_combined_x.shape[0], 2))
y_true[:N, 0] = 1
y_true[N:, 1] = 1

y_pred = discriminator.predict(_combined_x)

y_true = np.argmax(y_true, axis=1)
y_pred = np.argmax(y_pred, axis=1)

discriminator_accuracy = accuracy_score(y_true, y_pred)
print(f"Discriminator's Accuracy Score: {discriminator_accuracy}" )

In [None]:
print('REAL IMAGES')
print(np.argmax(discriminator.predict(test_x[0:20]), axis=1))
print()

noise_x, noise_y = next_noise_batch(test_noise, np.random.randint(1000))
d = generator.predict(noise_x[:20])

print('Generated Images')
print(np.argmax(discriminator.predict(d[0:20]), axis=1))