In [1]:
%pylab inline
%matplotlib inline

import numpy as np
import math

from tqdm import trange
from matplotlib import pyplot as plt
from IPython.display import clear_output
from time import sleep

Populating the interactive namespace from numpy and matplotlib


In [2]:
# Data import 

def fetch(url):
    import requests, gzip, os, hashlib, numpy
    path = os.path.join(os.getcwd(), hashlib.md5(url.encode('utf-8')).hexdigest())
    if os.path.isfile(path):
        with open(path, 'rb') as f:
            dat = f.read()
    else:
        with open(path, 'wb') as f:
            dat = requests.get(url).content
            f.write(dat)
    return numpy.frombuffer(gzip.decompress(dat), dtype=numpy.uint8).copy()

X_train = fetch('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz')[0x10:].reshape((-1, 28, 28))
Y_train = fetch('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz')[8:]
X_test = fetch('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz')[0x10:].reshape((-1, 28, 28))
Y_test = fetch('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz')[8:]

# GAN Implementation

In [5]:
# Layer function definition (from MNIST Classifier)

import abc

class LayerFunction:
    __metaclass__ = abc.ABCMeta

    def __call__(self, *args, **kwargs):
        return self.evaluate(*args, **kwargs)
        
    @abc.abstractmethod
    def evaluate(self, *args, **kwargs):
        pass
    
    @abc.abstractmethod
    def backprop(self):
        pass


# Activation function definition

class ActivationFunction(LayerFunction):
    
    def __init__(self, axis=1):
        self.axis = axis
        self.grad = 0
    

class ReLU(ActivationFunction):
    
    def evaluate(self, x):
        return np.maximum(x, 0)
    
    def backprop(self, dD, x):
        return (x > 0).astype(int)*dD


class LogSoftmax(ActivationFunction):
        
    def evaluate(self, x):
        x = x - x.max(1).reshape(-1, 1)
        x = x - np.log(np.exp(x).sum(1)).reshape(-1, 1)
        return x
            
    def backprop(self, dD, x):
        exp_lsm = np.exp(self.evaluate(x))
        dLSM = dD - exp_lsm*dD.sum(axis=self.axis).reshape((-1, 1))
        return dLSM
    

class Tanh(ActivationFunction):
    
    def evaluate(self, x):
        return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
    
    def backprop(self, dD, x):
        cosh = (np.exp(x) + np.exp(-x)) * 0.5
        return dD / (cosh**2)
        

# Loss function definition

class LossFunction(LayerFunction):

    def __init__(self, axis=0):
        self.axis = axis 
    

class NLLLoss(LossFunction):
    
    def evaluate(self, x, labels):
        if self.axis == 0:
            return -np.mean(x[np.arange(x.shape[0]), labels])
    
    def backprop(self, x, y):
        dx = np.zeros(x.shape, dtype=x.dtype)
        if self.axis == 0:
            dx[np.arange(x.shape[0]), y.T] = -1/len(y)
        return dx

In [None]:
# Generator class

class Generator:
    
    def __init__(self, l0=128, l1=256, l2=256**2, l3=256**4, l4=28*28, dtype=np.float32):
        self.w1 = np.random.uniform(-1., 1., shape=(l1, l2))/np.sqrt(l0*l1).astype(dtype)
        self.w2 = np.random.uniform(-1., 1., shape=(l2, l3))/np.sqrt(l1*l2).astype(dtype)
        self.w3 = np.random.uniform(-1., 1., shape=(l3, l4))/np.sqrt(l2*l3).astype(dtype)
        self.w4 = np.random.uniform(-1., 1., shape=(l4, l5))/np.sqrt(l3*l4).astype(dtype)
        
        self.act1 = ReLU()
        self.act2 = ReLU()
        self.act3 = ReLU()
        self.act4 = Tanh()
        
    def forward(self, x):
        x = x.dot(self.w1)
        x = self.act1(x)
        x = x.dot(self.w2)
        x = self.act2(x)
        x = x.dot(self.w3)
        x = self.act3(x)
        x = x.dot(self.w4)
        x = self.act4(x)
        return x
    
    def forward_backward(self, a0, D, loss_function):
                    
        # Forward pass
        z1 = a0.dot(self.w1)
        a1 = self.act1(z1)
        z2 = a1.dot(self.w2)
        a2 = self.act2(z2)
        z3 = a2.dot(self.w3)
        a3 = self.act3(z3)
        z4 = a3.dot(self.w4)
        a4 = self.act4(z4)
        
        # Loss and accuracy
        loss = loss_function(a4, y)
        accuracy = (np.argmax(a2, axis=1) == y).mean()

        # Backward pass
        da4 = loss_function.backprop(a4, y)
        dz4 = self.act4.backprop(da4, z4)
        dw4 = a3.T.dot(dz4) 
        da3 = dz4.dot(self.w4.T)
        dz3 = self.act3.backprop(da3, z3)
        dw3 = a2.T.dot(dz3) 
        da2 = dz3.dot(self.w3.T)
        dz2 = self.act2.backprop(da2, z2)
        dw2 = a1.T.dot(dz2) 
        da1 = dz2.dot(self.w2.T)
        dz1 = self.act1.backprop(da1, z1)
        dw1 = a0.T.dot(dz1)
        
        # Return loss and gradients
        grad = {'w1': dw1, 'w2': dw2, 'w3': dw3, 'w4': dw4}
        return loss, accuracy, grad

    
# Discriminator class

class Discriminator:
    
    def __init__(self, l1=28*28, l2=1000, l3=256, l4=1, dtype=np.float32):
        self.w1 = np.random.uniform(-1., 1., shape=(l1, l2))/np.sqrt(l1*l2).astype(dtype)
        self.w2 = np.random.uniform(-1., 1., shape=(l2, l3))/np.sqrt(l2*l3).astype(dtype)
        self.w3 = np.random.uniform(-1., 1., shape=(l3, l4))/np.sqrt(l3*l4).astype(dtype)
        
        self.act1 = ReLU()
        self.act2 = ReLU()
        self.act3 = Tanh()
        
    def forward(self, x):
        x = x.dot(self.w1)
        x = self.act1(x)
        x = x.dot(self.w2)
        x = self.act2(x)
        x = x.dot(self.w3)
        x = self.act3(x)
        return x
    
    def forward_backward(self, a0, loss_function):
        

# GAN class

class GAN:
    
    def __init__(self):
        self.G = Generator()
        self.D = Discriminator()