# This is the Notebook for SWAE on the MNIST Dataset

This notebook implements Sliced Wasserstein Auto-Encoders (SWAE).

To run this notebook you'll require the following packages:

* Numpy
* Matplotlib
* tensorflow
* Keras
* h5py


In [1]:
import numpy as np
import keras.utils
from keras.layers import Input,Dense, Flatten
from keras.models import load_model, Model
from keras.layers import Conv2D, UpSampling2D, AveragePooling2D
from keras.layers import LeakyReLU,Reshape
from keras.datasets import mnist
from keras.models import save_model
from keras import backend as K
import tensorflow as tf
import matplotlib.pyplot as plt
from IPython import display
import time
import numpy as np
import matplotlib.pyplot as plt
from cirq.contrib.svg import SVGCircuit
import tensorflow as tf
import tensorflow_quantum as tfq
from hep_VQAE import data_preprocessing as dp
from hep_VQAE import CAE as cae
import h5py
import keras
import numpy as np
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model
from hep_VQAE import utils as ut

2022-08-12 15:40:50.919614: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-12 15:40:50.919646: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-08-12 15:40:53.148552: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-08-12 15:40:53.148575: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-08-12 15:40:53.148589: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (tomskopfbahnhof): /proc/driver/nvidia/version does not exist
2022-08-12 15:40:53.148782: I tensorflow/core/platform/cpu_fe

## Define three helper functions
 * generateTheta(L,dim) -> Generates $L$ random sampels from $\mathbb{S}^{dim-1}$
 * generateZ(batchsize,endim) -> Generates 'batchsize' samples 'endim' dimensional samples from $q_Z$ 
 * stitchImages(I,axis=0) -> Helps us with visualization

In [2]:
def generateTheta(L,endim):
    # This function generates L random samples from the unit `ndim'-u
    theta=[w/np.sqrt((w**2).sum()) for w in np.random.normal(size=(L,endim))]
    return np.asarray(theta)
def generateZ(batchsize, endim):
    # This function generates 2D samples from a `circle' distribution in 
    # a 2-dimensional space
    #r=np.random.uniform(size=(batchsize))
    #theta=2*np.pi*np.random.uniform(size=(batchsize))
    #x=r*np.cos(theta)
    #y=r*np.sin(theta)
    #z_=np.array([x,y]).T
    return np.random.uniform(size=(batchsize,endim))
def stitchImages(I,axis=0):
    n,N,M,K=I.shape
    if axis==0:
        img=np.zeros((N*n,M,K))
        for i in range(n):
            img[i*N:(i+1)*N,:,:]=I[i,:,:,:]
    else:
        img=np.zeros((N,M*n,K))
        for i in range(n):
            img[:,i*M:(i+1)*M,:]=I[i,:,:,:]
    return img

## Defining the Encoder/Decoder as Keras graphs

In this section we define our encoder-decoder architectures and the corresponding loss function for the SWAE. 

In [3]:
img=Input((40,40,1)) #Input image 
interdim=128 # This is the dimension of intermediate latent variable 
             #(after convolution and before embedding)
endim=30 # Dimension of the embedding space
embedd=Input((endim,)) #Keras input to Decoder
depth=16 # This is a design parameter and in fact it is not the depth!
L=50 # Number of random projections
batchsize=500 

### Define Encoder

In [4]:

x=Conv2D(depth*1, (3, 3), padding='same')(img)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=Conv2D(depth*1, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=AveragePooling2D((2, 2), padding='same')(x)
x=Conv2D(depth*2, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=Conv2D(depth*2, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=AveragePooling2D((2, 2), padding='same')(x)
x=Conv2D(depth*4, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=Conv2D(depth*4, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=AveragePooling2D((2, 2), padding='same')(x)
x=Flatten()(x)
x=Dense(interdim,activation='relu')(x)
encoded=Dense(endim)(x)

encoder=Model(inputs=[img],outputs=[encoded])
encoder.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 40, 40, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 40, 40, 16)        160       
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 40, 40, 16)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 40, 40, 16)        2320      
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 40, 40, 16)        0         
                                                                 
 average_pooling2d (AverageP  (None, 20, 20, 16)       0         
 ooling2D)                                                       
                                                             

### Define Decoder

In [5]:
x=Dense(interdim)(embedd)
x=Dense(depth*100,activation='relu')(x)
# x=BatchNormalization(momentum=0.8)(x)
x=Reshape((5,5,4*depth))(x)
x=UpSampling2D((2, 2))(x)
x=Conv2D(depth*4, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=Conv2D(depth*4, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
x=UpSampling2D((2, 2))(x)
x=Conv2D(depth*4, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=Conv2D(depth*4, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
x=UpSampling2D((2, 2))(x)
x=Conv2D(depth*2, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
x=Conv2D(depth*2, (3, 3), padding='same')(x)
x=LeakyReLU(alpha=0.2)(x)
# x=BatchNormalization(momentum=0.8)(x)
# x=BatchNormalization(momentum=0.8)(x)
decoded=Conv2D(1, (3, 3), padding='same',activation='sigmoid')(x)

decoder=Model(inputs=[embedd],outputs=[decoded])
decoder.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 30)]              0         
                                                                 
 dense_2 (Dense)             (None, 128)               3968      
                                                                 
 dense_3 (Dense)             (None, 1600)              206400    
                                                                 
 reshape (Reshape)           (None, 5, 5, 64)          0         
                                                                 
 up_sampling2d (UpSampling2D  (None, 10, 10, 64)       0         
 )                                                               
                                                                 
 conv2d_6 (Conv2D)           (None, 10, 10, 64)        36928     
                                                           

Here we define Keras variables for $\theta$ and sample $z$s.

In [6]:
theta=K.variable(generateTheta(L,endim)) #Define a Keras Variable for \theta_ls
z=K.variable(generateZ(batchsize,endim)) #Define a Keras Variable for samples of z

Put encoder and decoder together to get the autoencoder

In [7]:
# Generate the autoencoder by combining encoder and decoder
aencoded=encoder(img)
ae=decoder(aencoded)
autoencoder=Model(inputs=[img],outputs=[ae])
autoencoder.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 40, 40, 1)]       0         
                                                                 
 model (Functional)          (None, 30)                280590    
                                                                 
 model_1 (Functional)        (None, 40, 40, 1)         386081    
                                                                 
Total params: 666,671
Trainable params: 666,671
Non-trainable params: 0
_________________________________________________________________


In [8]:
# Let projae be the projection of the encoded samples
projae=K.dot(aencoded,K.transpose(theta))
# Let projz be the projection of the $q_Z$ samples
projz=K.dot(z,K.transpose(theta))
# Calculate the Sliced Wasserstein distance by sorting 
# the projections and calculating the L2 distance between
W2=(tf.nn.top_k(tf.transpose(projae),k=batchsize).values-
    tf.nn.top_k(tf.transpose(projz),k=batchsize).values)**2

InvalidArgumentError: Matrix size-incompatible: In[0]: [30,500], In[1]: [30,50] [Op:MatMul]

In [None]:
w2weight=K.variable(10.0)
crossEntropyLoss= (1.0)*K.mean(K.binary_crossentropy(K.flatten(img),K.flatten(ae)))
L1Loss= (1.0)*K.mean(K.abs(K.flatten(img)-K.flatten(ae)))
W2Loss= w2weight*K.mean(W2)
# I have a combination of L1 and Cross-Entropy loss for the first term and then 
# W2 for the second term
vae_Loss=L1Loss+crossEntropyLoss+W2Loss
#autoencoder.add_loss(vae_Loss) # Add the custom loss to the model

In [None]:
#Compile the model
autoencoder.compile(optimizer='rmsprop',loss=vae_Loss)

### Load the MNIST dataset

In [None]:
f = h5py.File("small_quark_gluon_candr","r")
x_train = f.get('X')
y_train = f.get('y')

x_train, x_val, y_train, y_val = train_test_split(x_train[:], y_train[:], test_size=0.2, shuffle=True)

In [None]:
x_train_ones = x_train[y_train==1]
x_train_ones = x_train_ones.reshape(x_train_ones.shape + (1,))
x_train = x_train[y_train==0]
x_train = x_train.reshape(x_train.shape + (1,))

div1 = np.max(x_train, axis=(1,2)).reshape((x_train.shape[0],1,1,1))
div1[div1 == 0] = 1
x_train = x_train / div1
div2 = np.max(x_train_ones, axis=(1,2)).reshape((x_train_ones.shape[0],1,1,1))
div2[div2 == 0] = 1
x_train_ones = x_train_ones / div2

x_val_ones = x_val[y_val==1]
x_val_ones = x_val_ones.reshape(x_val_ones.shape + (1,))
x_val = x_val[y_val==0]
x_val = x_val.reshape(x_val.shape + (1,))

div1 = np.max(x_val, axis=(1,2)).reshape((x_val.shape[0],1,1,1))
div1[div1 == 0] = 1
x_val = x_val / div1
div2 = np.max(x_val_ones, axis=(1,2)).reshape((x_val_ones.shape[0],1,1,1))
div2[div2 == 0] = 1
x_val_ones = x_val_ones / div2


x_test = x_val
x_test_ones = x_val_ones

## Optimize the Loss

In [None]:
loss=[]
fig1=plt.figure()
for epoch in range(30):
    ind=np.random.permutation(x_train.shape[0])    
    if epoch>10:
        K.set_value(w2weight,1.1*K.eval(w2weight))
    for i in range(int(x_train.shape[0]/batchsize)):
        Xtr=x_train[ind[i*batchsize:(i+1)*batchsize],...]
        theta_=generateTheta(L,endim)
        z_=generateZ(batchsize,endim)
        K.set_value(z,z_)
        K.set_value(theta,theta_)        
        loss.append(autoencoder.train_on_batch(x=Xtr,y=None))        
    plt.plot(np.asarray(loss))
    display.clear_output(wait=True)
    display.display(plt.gcf()) 
    time.sleep(1e-3) 

## Encode and decode x_train

In [None]:
# Test autoencoder
en=encoder.predict(x_train)# Encode the images
dec=decoder.predict(en) # Decode the encodings

## Visualize the encoding space

In [None]:
# Distribution of the encoded samples
plt.figure(figsize=(10,10))
plt.scatter(en[:,0],-en[:,1],c=10*y_train, cmap=plt.cm.Spectral)
plt.xlim([-1.5,1.5])
plt.ylim([-1.5,1.5])
plt.show()

### Sample a grid in the encoding space and decode it to visualize this space

In [None]:
#Sample the latent variable on a Nsample x Nsample grid
Nsample=25
hiddenv=np.meshgrid(np.linspace(-1,1,Nsample),np.linspace(-1,1,Nsample))
v=np.concatenate((np.expand_dims(hiddenv[0].flatten(),1),
                  np.expand_dims(hiddenv[1].flatten(),1)),1)
# Decode the grid
decodeimg=np.squeeze(decoder.predict(v))

In [None]:
#Visualize the grid 
count=0
img=np.zeros((Nsample*28,Nsample*28))
for i in range(Nsample):
    for j in range(Nsample):        
        img[i*28:(i+1)*28,j*28:(j+1)*28]=decodeimg[count,...]
        count+=1

In [None]:
fig=plt.figure(figsize=(10,10))
plt.imshow(img,cmap='gray')
plt.show()

In [None]:
#Visualize the z samples
plt.figure(figsize=(10,10))
Z=generateZ(10000)
plt.scatter(Z[:,0],Z[:,1])
plt.xlim([-1.5,1.5])
plt.ylim([-1.5,1.5])
plt.show()

### Save the trained models! 

In [None]:
save_model(encoder,filepath='MNIST_circle_encoder.h5')
save_model(decoder,filepath='MNIST_circle_decoder.h5')
save_model(autoencoder,filepath='MNIST_circle_autoencoder.h5')

## Generate random samples with respect to $q_Z$.

In [None]:
randomSamples=generateZ(Nsample**2)
randomdecodeimg=np.squeeze(decoder.predict(randomSamples))

In [None]:
imgRandom=np.zeros((Nsample*28,Nsample*28))
count=0
for i in range(Nsample):
    for j in range(Nsample):        
        imgRandom[i*28:(i+1)*28,j*28:(j+1)*28]=randomdecodeimg[count,...]
        count+=1

In [None]:
fig=plt.figure(figsize=(10,10))
plt.imshow(imgRandom,cmap='gray')
plt.show()