# Deep Convolutional Generative Adversarial Network

## Step 1: Setup

### Import Packages 

In [55]:
import os
# import glob
import sys
import datetime

# import imageio
# from PIL import Image
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers, datasets, Sequential, optimizers

print("%d GPUs are available for tensorflow %s in current environment." % 
      (len(tf.config.experimental.list_physical_devices('GPU')), tf.__version__))

0 GPUs are available for tensorflow 2.0.0 in current environment.


### Set the Paths of the Outputs

In [56]:
# get the dictionary for the project
pwd = os.getcwd()
sys.path.append(pwd)
# set and create the path for log file for tesnorboard
log_dir = os.path.join(pwd, 'outputs', 'logs')
os.makedirs(log_dir, exist_ok = True)
# set and create the path for saving the images
image_dir = os.path.join(pwd, 'outputs', 'images')
os.makedirs(image_dir, exist_ok = True)
# set and create the path for saving the weights of the model
checkpoint_dir = os.path.join(pwd, 'outputs', 'checkpoints')
os.makedirs(checkpoint_dir, exist_ok = True)

In [57]:
# config the version of training, structure of model and usage of dataset
stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
structure = 'RAN32'
dataset = 'CIFAR10'
config = "%s-%s-%s" % (structure, dataset, stamp)
config

'RAN32-CIFAR10-20191209-111644'

### Load and Prepare the Dataset

In [58]:
# load cifer 10 data
(cifar10_train_x, cifar10_train_y), (cifar10_val_x, cifar10_val_y) = datasets.cifar10.load_data()
print("The shape of CIFAR10 data is: " + str(cifar10_train_x.shape))
# # load cifer 100 data
# (cifar100_train_x, cifar100_train_y), (cifar100_val_x, cifar100_val_y) = datasets.cifar100.load_data()
# print("The shape of CIFAR100 data is: " + str(cifar100_train_x.shape))

The shape of CIFAR10 data is: (50000, 32, 32, 3)


In [59]:
# normalize data
def preprocess_cifar10(x, y):        
    x = 2*tf.cast(x, dtype=tf.float32)/255 - 1
    y = tf.squeeze(tf.one_hot(y, depth=10, dtype=tf.int32))
    return x, y

def preprocess_cifar100(x, y):        
    x = 2*tf.cast(x, dtype=tf.float32)/255 - 1
    y = tf.one_hot(y, depth=100, dtype=tf.int32)
    return x, y

In [60]:
# preprocess the cifer10 dataset
cifar10_train_db = tf.data.Dataset.from_tensor_slices((cifar10_train_x, cifar10_train_y)).map(preprocess_cifar10).shuffle(60000)
cifar10_val_db = tf.data.Dataset.from_tensor_slices((cifar10_val_x, cifar10_val_y)).map(preprocess_cifar10)
# get one batch and check the dimension of this batch
cifar10_samples = next(iter(cifar10_val_db.batch(8)))
print("shape of one batch for CIFAR10 images is: %s and %s" % 
      (str(cifar10_samples[0].shape), str(cifar10_samples[1].shape)))

# # preprocess the cifer100 dataset
# cifar100_train_db = tf.data.Dataset.from_tensor_slices((cifar100_train_x, cifar100_train_y)).map(preprocess_cifar100).shuffle(60000)
# cifar100_val_db = tf.data.Dataset.from_tensor_slices((cifar100_val_x, cifar100_val_y)).map(preprocess_cifar100)
# # get one batch and check the dimension of this batch
# cifar100_samples = next(iter(cifar100_val_db.batch(8)))
# print("shape of one batch for CIFAR100 images is: %s and %s" % 
#       (str(cifar100_samples[0].shape), str(cifar100_samples[1].shape)))

shape of one batch for CIFAR10 images is: (8, 32, 32, 3) and (8, 10)


### Testing

In [83]:
# %load_ext autoreload
# %autoreload 2
from utils.residual_unit import *
from utils.attention_module import *
from utils.models import *

In [84]:
x, y = cifar10_samples
x.shape

TensorShape([8, 32, 32, 3])

In [85]:
down = DownSampleUnit(2, [16, 32], [2, 2], stride_pool=8)
down(x).shape

TensorShape([8, 1, 1, 32])

In [86]:
down = DownSampleUnit(2, [16,32], [2,2], stride_pool=2)
down(x).shape

TensorShape([8, 4, 4, 32])

In [87]:
up = UpSampleUnit(2, [16, 8], [1, 1], up_size=2)
up(x).shape

TensorShape([8, 64, 64, 8])

In [88]:
x.shape

TensorShape([8, 32, 32, 3])

In [89]:
model = Attention56()
model.build(input_shape=(None, 32, 32, 3))

In [90]:
model.summary()

Model: "attention56_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
attention_module_25 (Attenti multiple                  37719632  
_________________________________________________________________
residual_unit_identity_30 (R multiple                  1182848   
_________________________________________________________________
attention_module_26 (Attenti multiple                  42173696  
_________________________________________________________________
residual_unit_identity_32 (R multiple                  4724992   
_________________________________________________________________
attention_module_27 (Attenti multiple                  58964992  
_________________________________________________________________
residual_unit_193 (ResidualU multiple                  19149824  
_________________________________________________________________
batch_normalization_696 (Bat multiple                

In [79]:
logits = model(x)
print(logits.shape)

(8, 10)


In [80]:
model.summary()

Model: "attention56_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
attention_module_19 (Attenti multiple                  37719632  
_________________________________________________________________
residual_unit_identity_20 (R multiple                  1182848   
_________________________________________________________________
attention_module_20 (Attenti multiple                  42173696  
_________________________________________________________________
residual_unit_identity_22 (R multiple                  4724992   
_________________________________________________________________
attention_module_21 (Attenti multiple                  58964992  
_________________________________________________________________
residual_unit_145 (ResidualU multiple                  19149824  
_________________________________________________________________
batch_normalization_516 (Bat multiple                

- len(down) == s

In [13]:
# 16*16*256 -> 8*8*512
down_3 = [{'filters_residual':[512], 'strides_residual':[1], 'stride_pool':2}]
up_3 = [{'filters_residual':[512], 'strides_residual':[1], 'up_size':2}]

In [33]:
stage_3 = AttentionModule(
    filter_side=[256], stride_side=[2],                   # 16*16*256 -> 8*8*256
    filters_trunk=[256, 512], strides_trunk=[1, 1],       # 8*8*256 -> 8*8*256 -> 8*8*512
    filter_mask=512, s=1, down=down_3, up=up_3,           # 8*8*256 -> 8*8*512
    p = 1, t = 2, r=1)

In [38]:
# images = tf.random.normal([1, 16, 16, 256])
# stage_3(images).shape

In [40]:
# 32*32*128 -> 16*16*256
down_2 = [
    {'filters_residual':[256], 'strides_residual':[1], 'stride_pool':2},
    {'filters_residual':[512], 'strides_residual':[1], 'stride_pool':2}
]
up_2 = [
    {'filters_residual':[512], 'strides_residual':[1], 'up_size':2},
    {'filters_residual':[256], 'strides_residual':[1], 'up_size':2}
]

In [41]:
stage_2 = AttentionModule(
    filter_side=[128], stride_side=[2],                   # 32*32*128 -> 32*32*128
    filters_trunk=[128, 256], strides_trunk=[1, 1],       # 32*32*128 -> 32*32*128 -> 32*32*256
    filter_mask=256, s=2, down=down_2, up=up_2,           # 32*32*128 -> 32*32*256
    p = 1, t = 2, r=1)

In [44]:
# images = tf.random.normal([1, 32, 32, 3])
# stage_3(images).shape

In [46]:
# 32*32*3 -> 32*32*128
down_1 = [
    {'filters_residual':[128], 'strides_residual':[1], 'stride_pool':2},
    {'filters_residual':[256], 'strides_residual':[1], 'stride_pool':2},
    {'filters_residual':[512], 'strides_residual':[1], 'stride_pool':2}
]
up_1 = [
    {'filters_residual':[256], 'strides_residual':[1], 'up_size':2},
    {'filters_residual':[128], 'strides_residual':[1], 'up_size':2},
    {'filters_residual':[128], 'strides_residual':[1], 'up_size':2}
]

In [47]:
# 32*32*3 -> 32*32*128
stage_1 = AttentionModule(
    filter_side=[32], stride_side=[1],                # 32*32*3 -> 32*32*32
    filters_trunk=[56, 128], strides_trunk=[1, 1],    # 32*32*32 -> 32*32*56 -> 32*32*128
    filter_mask=128, s=3, down=down_1, up=up_1,
    p=1, t=2, r=1)

In [48]:
images = tf.random.normal([1, 32, 32, 3])
stage_3(images).shape

InvalidArgumentError: input and filter must have the same depth: 3 vs 256 [Op:Conv2D]

In [None]:
r = 2
s = 3

In [None]:
down = [
    {'filters_residual':[16, 32], 'strides_residual':[1, 1], 'stride_pool':2},
    {'filters_residual':[32, 64], 'strides_residual':[1, 1], 'stride_pool':2},
    {'filters_residual':[64, 128], 'strides_residual':[1, 2], 'stride_pool':2}
]

In [None]:
up = [
    {'filters_residual':[128, 64], 'strides_residual':[1, 1], 'up_size':4},
    {'filters_residual':[64, 32], 'strides_residual':[1, 2], 'up_size':2},
    {'filters_residual':[32, 16], 'strides_residual':[2, 2], 'up_size':2}
]

In [None]:
stage_1 = AttentionModule(filter_side=[256],
    stride_side=[1],
    filters_trunk=[256, 512],
    strides_trunk=[1, 2],
    filter_mask=[512],
    s=1,
    down=down_3,
    up=up_3,
    p=1,
    t=2,
    r=1)

In [None]:
AttentionModule(filter_side=[256],
    stride_side=[1],
    filters_trunk=[256, 512],
    strides_trunk=[1, 2],
    filter_mask=[512],
    s=1,
    down=down_3,
    up=up_3,
    p=1,
    t=2,
    r=1)

In [None]:
maskbranch = MaskBranch(4, s, r, down, up)

In [None]:
mask = maskbranch(x)

In [None]:
mask.shape

In [None]:
a = layers.MaxPool2D(pool_size=2, strides=2)

In [None]:
a.strides

In [None]:
trunk = TrunkBranch([4,4],[2,1],2)
trunk(x).shape

In [None]:
down = [
    {'filters_residual':[16, 32], 'strides_residual':[1, 2], 'stride_pool':2},
    {'filters_residual':[32, 64], 'strides_residual':[2, 2], 'stride_pool':2}
]
up = [
    {'filters_residual':[64, 32], 'strides_residual':[2, 2], 'up_size':4},
    {'filters_residual':[32, 16], 'strides_residual':[1, 1], 'up_size':4}
]
#maskbranch = MaskBranch(4, 2, 2, down, up)
#maskbranch(x).shape


filter_pre = [8]
strides_pre = [1]
filter_trunk = [4,4]
strides_trunk = [2,1]
p = 1
t = 2
r = 2
num_sampling = 2
filter_mask = 4
filter_post = [4]
strides_post = [1]
attmodule = AttModule0(filter_pre, strides_pre,
           filter_trunk, strides_trunk,
           filter_mask, num_sampling, down, up,
           filter_post, strides_post,
           p, t, r)

In [None]:
out = attmodule(x)
out.shape

In [None]:
a = [10, 15]
b = [11, 18]
for i, (x, y) in enumerate(zip(a, b)):
    print(i, x, y)

In [None]:
list(zip(a, b))

In [None]:
layers.Conv2D()