### 1. GAN from numpy

Content derived from this reference blog:

https://towardsdatascience.com/only-numpy-implementing-gan-general-adversarial-networks-and-adam-optimizer-using-numpy-with-2a7e4e032021

In [1]:
## import libs
import os
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.utils import shuffle
import tensorflow as tf
random_numer = int(12345)
np.random.seed(random_numer)


### 2. helper functions

In [2]:
## relu
def ReLu(x):
    mask = (x>0) * 1.0
    return mask *x
def d_ReLu(x):
    mask = (x>0) * 1.0
    return mask 

## arctan
def arctan(x):
    return np.arctan(x)
def d_arctan(x):
    return 1 / (1 + x ** 2)

## tanh
def tanh(x):
    return np.tanh(x)
def d_tanh(x):
    return 1 - np.tanh(x) ** 2

## sigmoid
def log(x):
    return 1 / ( 1+ np.exp(-1*x))
def d_log(x):
    return log(x) * (1 - log(x))

## make weight and bias tensor
def makeWeight(d1, d2, factor = 0.002):
    '''
    d1: 1st Dimension
    d2: 2nd Dimension
    factor: the factor to control the value range [-factor, factor]
    '''
    return np.random.normal(size = (d1, d2), scale = 1/np.sqrt(d1/2)) * factor
   
def makeBias(d, factor = 0.002, flag = 'zeros'):
    if flag == 'zeros':
        return np.zeros(d)
    elif flag == 'normal':
        return np.random.normal(size = (d), scale = 1/np.sqrt(d/2)) * factor
    else:
        raise Exception("Bias should be initialized with only 'zeros' or 'normal' flags!")

In [3]:
def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
    return fig

### 3. Load Data and declare hyper

In [4]:
print('-------- Load Data --------------------------------')
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
images, labels = X_test.reshape(-1, 28*28), y_test ## use test set
images, labels = shuffle(np.asarray(images),np.asarray(labels)) ## shuffle together
print('-------- Data shape:', images.shape, ', Labels shape"', labels.shape)
print('-------- Data Loaded --------------------------------')

-------- Load Data --------------------------------
-------- Data shape: (10000, 784) , Labels shape" (10000,)
-------- Data Loaded --------------------------------


In [5]:
## define parameters
G_input = 100
hidden_input1 = 128
hidden_input2 = 256 
hidden_input3 = 346
hidden_input4 = 480
hidden_input5 = 560
hidden_input6 = 686
hidden_input7 = 784 ## back to original

In [6]:
## Build Descriminator
print('--------- Descriminator Network ----------')
print('Arch: 784 x 128 x 1')
# 2. Declare Weights
D_W1 = makeWeight(784, hidden_input1)
D_W2 = makeWeight(hidden_input1, 1)
D_b1 = makeBias(hidden_input1)
D_b2 = makeBias(1)

--------- Descriminator Network ----------
Arch: 784 x 128 x 1


In [7]:
## Build Generator
print('--------- Generator Network ----------')
print('Arch: 100 x 128 x 256 x 346 x 480 x 560 x 686 x 784')
G_W1 = makeWeight(G_input, hidden_input1)
G_W2 = makeWeight(hidden_input1,hidden_input2)
G_W3 = makeWeight(hidden_input2,hidden_input3)
G_W4 = makeWeight(hidden_input3,hidden_input4)
G_W5 = makeWeight(hidden_input4,hidden_input5)
G_W6 = makeWeight(hidden_input5,hidden_input6)
G_W7 = makeWeight(hidden_input6,hidden_input7)

G_b1 = makeBias(hidden_input1)
G_b2 = makeBias(hidden_input2)
G_b3 = makeBias(hidden_input3)
G_b4 = makeBias(hidden_input4)
G_b5 = makeBias(hidden_input5)
G_b6 = makeBias(hidden_input6)
G_b7 = makeBias(hidden_input7)

--------- Generator Network ----------
Arch: 100 x 128 x 256 x 346 x 480 x 560 x 686 x 784


In [8]:
## make a tuple of dictions
D_params = (dict({'W':D_W1,'dW':D_W1 * 0, 'b': D_b1, 'db': D_b1 * 0, 'a': ReLu, 'da': d_ReLu}), 
            dict({'W':D_W2,'dW':D_W2 * 0, 'b': D_b2, 'db': D_b2 * 0, 'a': log , 'da': d_log})) 


G_params = (dict({'W':G_W1, 'dW':G_W1 * 0, 'b': G_b1, 'db': G_b1 * 0, 'a': arctan, 'da': d_arctan}), 
            dict({'W':G_W2, 'dW':G_W2 * 0, 'b': G_b2, 'db': G_b2 * 0, 'a': ReLu  , 'da': d_ReLu}), 
            dict({'W':G_W3, 'dW':G_W3 * 0, 'b': G_b3, 'db': G_b3 * 0, 'a': arctan, 'da': d_arctan}), 
            dict({'W':G_W4, 'dW':G_W4 * 0, 'b': G_b4, 'db': G_b4 * 0, 'a': ReLu  , 'da': d_ReLu}), 
            dict({'W':G_W5, 'dW':G_W5 * 0, 'b': G_b5, 'db': G_b5 * 0, 'a': tanh  , 'da': d_tanh}), 
            dict({'W':G_W6, 'dW':G_W6 * 0, 'b': G_b6, 'db': G_b6 * 0, 'a': ReLu  , 'da': d_ReLu}), 
            dict({'W':G_W7, 'dW':G_W7 * 0, 'b': G_b7, 'db': G_b7 * 0, 'a': log   , 'da': d_log})) 

In [9]:
def forwardPass(Input, Params):
    Output = Input.copy()
    for layer in Params:
        Output = Output.dot(layer['W']) + layer['b']
        if layer['a']:
            Output = layer['a'](Output)
    return Output

In [10]:
def updateVMP(V, M, Params, lr, eps):
    # ---- Update Gradient G ----
    assert len(V) == len(M)
    
    ## update V, M
    for idx in range(len(V)):
        if idx % 2 == 0:
            M[idx] = beta_1 * M[idx] + (1 - beta_1) * Params[idx//2]['dW']
            V[idx] = beta_2 * V[idx] + (1 - beta_2) * Params[idx//2]['dW'] ** 2            
        else:
            M[idx] = beta_1 * M[idx] + (1 - beta_1) * Params[idx//2]['db']
            V[idx] = beta_2 * V[idx] + (1 - beta_2) * Params[idx//2]['db'] ** 2  

    ## update Params
    for idx, layer in enumerate(Params):
        layer['W'] = layer['W'] - (lr/(np.sqrt(V[idx*2]  /(1-beta_2)) + eps)) * (M[idx*2]/(1-beta_1))
        layer['b'] = layer['b'] - (lr/(np.sqrt(V[idx*2+1]/(1-beta_2)) + eps)) * (M[idx*2 + 1]/(1-beta_1))
    
    return V, M, Params


In [11]:
# 3. For Adam Optimzier
VD = [0] * 4
MD = [0] * 4
VG = [0] * 14
MG = [0] * 14

beta_1 = 0.99
beta_2 = 0.999
num_epoch = 10
learing_rate = 1e-4
eps    = 1e-7

In [12]:
print('--------- Started Training ----------')
for iteration in range(num_epoch):

    ## random sampling an image
    random_int = np.random.randint(len(images) - 5)
    current_image = np.expand_dims(images[random_int],axis=0) ## to tensor: (1, 784)

    # Func G: Generate The first Fake Data
    Z   = np.random.uniform(-1., 1., size=[1, G_input])
    current_fake_data = forwardPass(Z, G_params)

    # Func D: Forward Feed for Real and Fake data, calculate the cost
    Dl2_rA = forwardPass(current_image, D_params)
    Dl2_fA = forwardPass(current_fake_data, D_params)
    D_cost = - np.log(Dl2_rA) + np.log(1.0- Dl2_fA) # Func: Cost D (GAN Loss)

    # Func Back D: Gradient
    grad_f_w2_part_1 =  1/(1.0- Dl2_fA)
    grad_f_w2_part_2 =  d_log(Dl2_f)
    grad_f_w2_part_3 =  Dl1_fA
    grad_f_w2 = grad_f_w2_part_3.T.dot(grad_f_w2_part_1 * grad_f_w2_part_2) 
    grad_f_b2 = grad_f_w2_part_1 * grad_f_w2_part_2

    grad_f_w1_part_1 =  (grad_f_w2_part_1 * grad_f_w2_part_2).dot(D_W2.T)
    grad_f_w1_part_2 =  d_ReLu(Dl1_f)
    grad_f_w1_part_3 =   current_fake_data
    grad_f_w1 =       grad_f_w1_part_3.T.dot(grad_f_w1_part_1 * grad_f_w1_part_2) 
    grad_f_b1 =      grad_f_w1_part_1 * grad_f_w1_part_2

    grad_r_w2_part_1 =  - 1/Dl2_rA
    grad_r_w2_part_2 =  d_log(Dl2_r)
    grad_r_w2_part_3 =   Dl1_rA
    grad_r_w2 =       grad_r_w2_part_3.T.dot(grad_r_w2_part_1 * grad_r_w2_part_2) 
    grad_r_b2 =       grad_r_w2_part_1 * grad_r_w2_part_2

    grad_r_w1_part_1 =  (grad_r_w2_part_1 * grad_r_w2_part_2).dot(D_W2.T)
    grad_r_w1_part_2 =  d_ReLu(Dl1_r)
    grad_r_w1_part_3 =   current_image
    grad_r_w1 =       grad_r_w1_part_3.T.dot(grad_r_w1_part_1 * grad_r_w1_part_2) 
    grad_r_b1 =       grad_r_w1_part_1 * grad_r_w1_part_2

    grad_w1 = D_params[0]['dW'] + grad_r_w1
    grad_b1 = D_params[0]['db'] + grad_r_b1    
    grad_w2 = D_params[1]['dW'] + grad_r_w2
    grad_b2 = D_params[1]['db'] + grad_r_b2
    
    ## to container
    D_params[0]['dW'] = grad_w1
    D_params[0]['db'] = grad_b1
    D_params[1]['dW'] = grad_w2
    D_params[1]['db'] = grad_w2    

    # ---- Update Gradient D ----
    VD, MD, D_params = updateVMP(VD, MD, D_params, learing_rate, eps)
    D_W1 = D_params[0]['W']
    D_b1 = D_params[0]['b']
    D_W2 = D_params[1]['W']
    D_b2 = D_params[1]['b']

    # Func G: Forward Feed for G
    Z = np.random.uniform(-1., 1., size=[1, G_input])
    Gl1 = Z.dot(G_W1) + G_b1
    Gl1A = arctan(Gl1)
    Gl2 = Gl1A.dot(G_W2) + G_b2
    Gl2A = ReLu(Gl2)
    Gl3 = Gl2A.dot(G_W3) + G_b3
    Gl3A = arctan(Gl3)

    Gl4 = Gl3A.dot(G_W4) + G_b4
    Gl4A = ReLu(Gl4)
    Gl5 = Gl4A.dot(G_W5) + G_b5
    Gl5A = tanh(Gl5)
    Gl6 = Gl5A.dot(G_W6) + G_b6
    Gl6A = ReLu(Gl6)
    Gl7 = Gl6A.dot(G_W7) + G_b7
    
    current_fake_data = log(Gl7)

    Dl1 = current_fake_data.dot(D_W1) + D_b1
    Dl1_A = ReLu(Dl1)
    Dl2 = Dl1_A.dot(D_W2) + D_b2
    Dl2_A = log(Dl2)

    # Func: Cost G
    G_cost = -np.log(Dl2_A)

    # Func: Gradient
    grad_G_w7_part_1 = ((-1/Dl2_A) * d_log(Dl2).dot(D_W2.T) * (d_ReLu(Dl1))).dot(D_W1.T)
    grad_G_w7_part_2 = d_log(Gl7)
    grad_G_w7_part_3 = Gl6A
    grad_G_w7 = grad_G_w7_part_3.T.dot(grad_G_w7_part_1 * grad_G_w7_part_1)
    grad_G_b7 = grad_G_w7_part_1 * grad_G_w7_part_2

    grad_G_w6_part_1 = (grad_G_w7_part_1 * grad_G_w7_part_2).dot(G_W7.T)
    grad_G_w6_part_2 = d_ReLu(Gl6)
    grad_G_w6_part_3 = Gl5A
    grad_G_w6 = grad_G_w6_part_3.T.dot(grad_G_w6_part_1 * grad_G_w6_part_2)
    grad_G_b6 = (grad_G_w6_part_1 * grad_G_w6_part_2)

    grad_G_w5_part_1 = (grad_G_w6_part_1 * grad_G_w6_part_2).dot(G_W6.T)
    grad_G_w5_part_2 = d_tanh(Gl5)
    grad_G_w5_part_3 = Gl4A
    grad_G_w5 = grad_G_w5_part_3.T.dot(grad_G_w5_part_1 * grad_G_w5_part_2)
    grad_G_b5 = (grad_G_w5_part_1 * grad_G_w5_part_2)

    grad_G_w4_part_1 = (grad_G_w5_part_1 * grad_G_w5_part_2).dot(G_W5.T)
    grad_G_w4_part_2 = d_ReLu(Gl4)
    grad_G_w4_part_3 = Gl3A
    grad_G_w4 = grad_G_w4_part_3.T.dot(grad_G_w4_part_1 * grad_G_w4_part_2)
    grad_G_b4 = (grad_G_w4_part_1 * grad_G_w4_part_2)

    grad_G_w3_part_1 = (grad_G_w4_part_1 * grad_G_w4_part_2).dot(G_W4.T)
    grad_G_w3_part_2 = d_arctan(Gl3)
    grad_G_w3_part_3 = Gl2A
    grad_G_w3 = grad_G_w3_part_3.T.dot(grad_G_w3_part_1 * grad_G_w3_part_2)
    grad_G_b3 = (grad_G_w3_part_1 * grad_G_w3_part_2)

    grad_G_w2_part_1 = (grad_G_w3_part_1 * grad_G_w3_part_2).dot(G_W3.T)
    grad_G_w2_part_2 = d_ReLu(Gl2)
    grad_G_w2_part_3 = Gl1A
    grad_G_w2 = grad_G_w2_part_3.T.dot(grad_G_w2_part_1 * grad_G_w2_part_2)
    grad_G_b2 = (grad_G_w2_part_1 * grad_G_w2_part_2)

    grad_G_w1_part_1 = (grad_G_w2_part_1 * grad_G_w2_part_2).dot(G_W2.T)
    grad_G_w1_part_2 = d_arctan(Gl1)
    grad_G_w1_part_3 = Z
    grad_G_w1 = grad_G_w1_part_3.T.dot(grad_G_w1_part_1 * grad_G_w1_part_2)
    grad_G_b1 = grad_G_w1_part_1 * grad_G_w1_part_2

    # ---- Update Gradient G ---- 
    VG, MG, G_params = updateVMP(VG, MG, G_params, learing_rate, eps)
    G_W1 = G_params[0]['W']
    G_b1 = G_params[0]['b']
    
    G_W2 = G_params[1]['W']
    G_b2 = G_params[1]['b']
    
    G_W3 = G_params[2]['W']
    G_b3 = G_params[2]['b']
    
    G_W4 = G_params[3]['W']
    G_b4 = G_params[3]['b']
    
    G_W5 = G_params[4]['W']
    G_b5 = G_params[4]['b']
    
    G_W6 = G_params[5]['W']
    G_b6 = G_params[5]['b']
    
    G_W7 = G_params[6]['W']
    G_b7 = G_params[6]['b']
    
    # --- Print Error ----
    print("Current Iter: ",iteration, " Current D cost:",D_cost, " Current G cost: ", G_cost,end='\r')
    

--------- Started Training ----------


NameError: name 'Dl2_f' is not defined