# Data Preprocessor for the RNN
The RNN takes z, the encoding of a certain image, and predicts z', the encoding of the image following. So, to make data to train the RNN on, we need to make pairs of z and z' from our pictoral data.

## Loading stuff into the Kernel

In [1]:
# imports
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape, MaxPooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.models import save_model
from tensorflow.keras.datasets import mnist
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import joblib
# added so that cv2 gets installed in kernal
# import sys
# !{sys.executable} -m pip install opencv-python
# commented the above code, it started working, idk why
# if code not working try uncommenting the above
import cv2
import random
import glob

In [2]:
# data (preprocessed from Data Processing Script)

train_data = joblib.load("images/train_data.z")
print(train_data.shape[2])

# Reshape 
img_width  = train_data.shape[1]
img_height = train_data.shape[2]
num_channels = 1
x_train = train_data.reshape(train_data.shape[0], img_height, img_width, num_channels)

input_shape = (img_height, img_width, num_channels)
print(input_shape)

128
(128, 128, 1)


In [3]:
# load the vae (have to make the architecture again, make sure the code below
#   matches the code in the Data Prepper/VAE Trainer)


# ====== Encoder ======
# changing this will make the model exponentially larger or smaller
latent_dim = 2048

# the model (saved in x)
input_img = Input(shape=input_shape, name='encoder_input')
x = Conv2D(64, 3, padding='same', activation='relu')(input_img)
x = MaxPooling2D((2,2), padding = 'same')(x)
x = Dropout(0.2)(x)
x = Conv2D(128, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2), padding = 'same')(x)
x = Dropout(0.2)(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2), padding = 'same')(x)
x = Dropout(0.2)(x)
x = Conv2D(32, 3, padding='same', activation='relu')(x)

conv_shape = K.int_shape(x) #Shape of conv to be provided to decoder
#Flatten
x = Flatten()(x)
x = Dense(latent_dim*2, activation='relu')(x)

# Two outputs, for latent mean and log variance (std. dev.)
#  Use these to sample random variables in latent space to which inputs are mapped. 
z_mu = Dense(latent_dim, name='latent_mu')(x)   #Mean values of encoded input
z_sigma = Dense(latent_dim, name='latent_sigma')(x)  #Std dev. (variance) of encoded input

#REPARAMETERIZATION TRICK
# Define sampling function to sample from the distribution
# Reparameterize sample based on the process defined by Gunderson and Huang
# into the shape of: mu + sigma squared x eps
#This is to allow gradient descent to allow for gradient estimation accurately. 
def sample_z(args):
    z_mu, z_sigma = args
    eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
    return z_mu + K.exp(z_sigma / 2) * eps

# sample vector from the latent distribution
# z is the labda custom layer we are adding for gradient descent calculations
  # using mu and variance (sigma)
z = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([z_mu, z_sigma])

#Z (lambda layer) will be the last layer in the encoder.
# Define and summarize encoder model.
encoder = Model(input_img, [z_mu, z_sigma, z], name='encoder')
print(encoder.summary())

# ==== Decoder ====

# decoder takes the latent vector as input
decoder_input = Input(shape=(latent_dim, ), name='decoder_input')

# Need to start with a shape that can be remapped to original image shape as
#we want our final utput to be same shape original input.
#So, add dense layer with dimensions that can be reshaped to desired output shape
x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation='relu')(decoder_input)
# reshape to the shape of last conv. layer in the encoder, so we can 
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)
# upscale (conv2D transpose) back to original shape
# use Conv2DTranspose to reverse the conv layers defined in the encoder
x = Conv2DTranspose(32, 3, padding='same', activation='relu',strides=(2, 2))(x)
x = Conv2DTranspose(32, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu',strides=(2, 2))(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu',strides=(2, 2))(x)
#Can add more conv2DTranspose layers, if desired. 
#Using sigmoid activation
x = Conv2DTranspose(num_channels, 3, padding='same', activation='sigmoid', name='decoder_output')(x)

# Define and summarize decoder model
decoder = Model(decoder_input, x, name='decoder')
decoder.summary()

# apply the decoder to the latent sample 
z_decoded = decoder(z)

# ===== Loss Function =====

class CustomLayer(keras.layers.Layer):
    def vae_loss(self, x, z_decoded):
        x = K.flatten(x)
        z_decoded = K.flatten(z_decoded)
        
        # Reconstruction loss (as we used sigmoid activation we can use binarycrossentropy)
        recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        
        # KL divergence
        kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1)
        return K.mean(recon_loss + kl_loss)

    # add custom loss to the class
    def call(self, inputs):
        x = inputs[0]
        z_decoded = inputs[1]
        loss = self.vae_loss(x, z_decoded)
        self.add_loss(loss, inputs=inputs)
        return x

# apply the custom loss to the input images and the decoded latent distribution sample
y = CustomLayer()([input_img, z_decoded])
# y is basically the original image after encoding input img to mu, sigma, z
# and decoding sampled z values.
#This will be used as output for vae

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 128, 128, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 128, 128, 64) 640         encoder_input[0][0]              
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 64, 64, 64)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
dropout (Dropout)               (None, 64, 64, 64)   0           max_pooling2d[0][0]              
____________________________________________________________________________________________

In [4]:
# load encoder
vae = Model(input_img, y, name = 'vae')
vae.load_weights(os.getcwd() + "\\models\\vae")
encoder = Model(vae.input, vae.layers[15].output)

## Data Preprocessor

In [5]:
# preprocesses data as before, but puts markers inbetween to seperate the data

os.chdir("images")

# save boolean of if data has been saved already or not so can negate future
#    cells to avoid the code breaking
data_exists = os.path.exists("train_data_rnn.z")
# constant for sizing
IMG_SIZE = 128

# if data not made, made it
if not data_exists:
    data = []
    path = os.getcwd()
    print(path)

    def create_data():   
        count = 0
        for folder in os.listdir(path):
            if folder == "train_data.z":
                continue
            print("FOLDER: ",folder)
            # added + "/" + to below to make it work
            for filename in os.listdir(path + "/" + folder):
                # changed to NEF (That's what I have the images saved as, may need to change back to JPG in future)
                if(".NEF" in filename):
                    # added slash here too
                    temp_path = path + "/" + folder + "/" + filename
                    count += 1
                    try:
                        img_array = cv2.imread(temp_path)
                        img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
                        img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                        data.append(img_array)
                        print("image processed..." + str(count) , end="\r")

                    except Exception as e:
                        pass
            data.append("|")

    create_data()
# else, don't
else:
    print("train_data_rnn.z already exists :), you shouldn't need to run this script again unless the data's changed (in which case, delete the current one)")

C:\Users\mattp\Documents\CoEmergenceLab\WorldModel-selfmade\images
FOLDER:  2021-02-27
FOLDER:  2021-03-010
FOLDER:  2021-03-030
FOLDER:  2021-03-041
FOLDER:  2021-03-0836
FOLDER:  2021-03-0957
FOLDER:  2021-03-1080
FOLDER:  2021-06-2141
FOLDER:  2021-06-2304
FOLDER:  2021-06-2515
FOLDER:  2021-06-2835
FOLDER:  2021-06-3045
FOLDER:  2021-07-0146
FOLDER:  2021-07-0243
image processed...825

In [6]:
# reshape data
print(train_data.shape[2])

# Reshape 
img_width  = train_data.shape[1]
img_height = train_data.shape[2]
num_channels = 1
x_train = train_data.reshape(train_data.shape[0], img_height, img_width, num_channels)


input_shape = (img_height, img_width, num_channels)
print(input_shape)

128
(128, 128, 1)


In [7]:
# goes through the seperated data and pairs it
paired_data = []

for i in range(len(x_train)-1):
    if x_train[i] == "|" or x_train[i+1] == "|":
        continue
    else:
        paired_data.append([x_train[i], x_train[i+1]])

  """


In [12]:
print(paired_data[0][0].shape)

(128, 128, 1)


In [16]:
# runs the paired data through the encoder to get the latent vectors
z_vals = []

for pair in paired_data:
    input1 = pair[0][None,:,:,:]
    input2 = pair[0][None,:,:,:]
    z1 = encoder.predict(input1)
    z2 = encoder.predict(input2)
    z_vals.append([z1, z2])

In [18]:
# finished, now save the data
joblib.dump(z_vals, "train_data_rnn.z")

['train_data_rnn.z']