# Data Preprocessor for the Controller
The controller receives the z produced by the encoder (from the VAE) and the z' produced by the RNN's prediction to determine an action.
The below processor takes a collection of image data and the action performed for preprocessing. It finds z and z' by putting the image through the encoder and then putting z through the RNN.
In the form I left it in, we did not have a good dataset of images and corresponding images. Right now, it just sets every action to 22 (no action).

## Loading stuff into Kernal

In [1]:
# imports
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape, MaxPooling2D, Dropout, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.models import save_model
from tensorflow.keras.datasets import mnist
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import joblib
# added so that cv2 gets installed in kernal
# import sys
# !{sys.executable} -m pip install opencv-python
# commented the above code, it started working, idk why
# if code not working try uncommenting the above
import cv2
import random
import glob

In [2]:
# data (preprocessed from Data Processing Script)

train_data = joblib.load("images/train_data.z")
print(train_data.shape[2])

# Reshape 
img_width  = train_data.shape[1]
img_height = train_data.shape[2]
num_channels = 1
x_train = train_data.reshape(train_data.shape[0], img_height, img_width, num_channels)

input_shape = (img_height, img_width, num_channels)
print(input_shape)

128
(128, 128, 1)


In [3]:
# load the vae (have to make the architecture again, make sure the code below
#   matches the code in the Data Prepper/VAE Trainer)


# ====== Encoder ======
# changing this will make the model exponentially larger or smaller
latent_dim = 2048

# the model (saved in x)
input_img = Input(shape=input_shape, name='encoder_input')
x = Conv2D(64, 3, padding='same', activation='relu')(input_img)
x = MaxPooling2D((2,2), padding = 'same')(x)
x = Dropout(0.2)(x)
x = Conv2D(128, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2), padding = 'same')(x)
x = Dropout(0.2)(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2), padding = 'same')(x)
x = Dropout(0.2)(x)
x = Conv2D(32, 3, padding='same', activation='relu')(x)

conv_shape = K.int_shape(x) #Shape of conv to be provided to decoder
#Flatten
x = Flatten()(x)
x = Dense(latent_dim*2, activation='relu')(x)

# Two outputs, for latent mean and log variance (std. dev.)
#  Use these to sample random variables in latent space to which inputs are mapped. 
z_mu = Dense(latent_dim, name='latent_mu')(x)   #Mean values of encoded input
z_sigma = Dense(latent_dim, name='latent_sigma')(x)  #Std dev. (variance) of encoded input

#REPARAMETERIZATION TRICK
# Define sampling function to sample from the distribution
# Reparameterize sample based on the process defined by Gunderson and Huang
# into the shape of: mu + sigma squared x eps
#This is to allow gradient descent to allow for gradient estimation accurately. 
def sample_z(args):
    z_mu, z_sigma = args
    eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
    return z_mu + K.exp(z_sigma / 2) * eps

# sample vector from the latent distribution
# z is the labda custom layer we are adding for gradient descent calculations
  # using mu and variance (sigma)
z = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([z_mu, z_sigma])

#Z (lambda layer) will be the last layer in the encoder.
# Define and summarize encoder model.
encoder = Model(input_img, [z_mu, z_sigma, z], name='encoder')
print(encoder.summary())

# ==== Decoder ====

# decoder takes the latent vector as input
decoder_input = Input(shape=(latent_dim, ), name='decoder_input')

# Need to start with a shape that can be remapped to original image shape as
#we want our final utput to be same shape original input.
#So, add dense layer with dimensions that can be reshaped to desired output shape
x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation='relu')(decoder_input)
# reshape to the shape of last conv. layer in the encoder, so we can 
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)
# upscale (conv2D transpose) back to original shape
# use Conv2DTranspose to reverse the conv layers defined in the encoder
x = Conv2DTranspose(32, 3, padding='same', activation='relu',strides=(2, 2))(x)
x = Conv2DTranspose(32, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu',strides=(2, 2))(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu',strides=(2, 2))(x)
#Can add more conv2DTranspose layers, if desired. 
#Using sigmoid activation
x = Conv2DTranspose(num_channels, 3, padding='same', activation='sigmoid', name='decoder_output')(x)

# Define and summarize decoder model
decoder = Model(decoder_input, x, name='decoder')
decoder.summary()

# apply the decoder to the latent sample 
z_decoded = decoder(z)

# ===== Loss Function =====

class CustomLayer(keras.layers.Layer):
    def vae_loss(self, x, z_decoded):
        x = K.flatten(x)
        z_decoded = K.flatten(z_decoded)
        
        # Reconstruction loss (as we used sigmoid activation we can use binarycrossentropy)
        recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        
        # KL divergence
        kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1)
        return K.mean(recon_loss + kl_loss)

    # add custom loss to the class
    def call(self, inputs):
        x = inputs[0]
        z_decoded = inputs[1]
        loss = self.vae_loss(x, z_decoded)
        self.add_loss(loss, inputs=inputs)
        return x

# apply the custom loss to the input images and the decoded latent distribution sample
y = CustomLayer()([input_img, z_decoded])
# y is basically the original image after encoding input img to mu, sigma, z
# and decoding sampled z values.
#This will be used as output for vae

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 128, 128, 1  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 128, 128, 64  640         ['encoder_input[0][0]']          
                                )                                                                 
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 64, 64, 64)   0           ['conv2d[0][0]']                 
                                                                                            

In [4]:
# load encoder
vae = Model(input_img, y, name = 'vae')
vae.load_weights(os.getcwd() + "\\models\\vae.h5")
encoder = Model(vae.input, vae.layers[15].output)

In [5]:
# load rnn preprocessed data
data = joblib.load("images/train_data_rnn.z")
train_data = np.array([np.array(p[0])for p in data])
answers = np.array([np.array(p[0]) for p in data])
print(train_data[0])
print(np.shape(train_data[0]))
z_len = np.shape(train_data[0])[-1]
print(z_len)

[[ 0.0104093  -0.7574463   0.45002717 ... -1.2421685  -0.7610112
  -1.6004298 ]]
(1, 2048)
2048


In [6]:
# Rebuild RNN architecture (make sure this matches architecture in RNN Trainer)
input_to_rnn = Input(shape=(1,z_len))

x = LSTM(2048, return_sequences=True)(input_to_rnn)
x = Dropout(0.2)(x)
x = Dense(2048)(x)
x = Dropout(0.2)(x)

output = Dense(2048, activation='sigmoid')(x)

In [7]:
rnn = Model(input_to_rnn, output, name = 'rnn')
rnn.load_weights(os.getcwd() + "\\models\\rnn.h5")

## Data Processor

In [8]:
# puts all images in list
os.chdir("images")

# constant for sizing
IMG_SIZE = 128

if os.path.exists("train_data_controller.z"):
    print("train_data_controller.z already exists, if this notebook is run to completion the old data will be replaced.")

# put images here
data = []

# loop for loading images
path = os.getcwd()
print(path)
count = 0
for folder in os.listdir(path):
    if "train_data" in folder:  # skips any train data files, as that should be the only non-directory item in images
        continue
    print("FOLDER: ",folder)
    # added + "/" + to below to make it work
    for filename in os.listdir(path + "/" + folder):
        # changed to NEF (That's what I have the images saved as, may need to change back to JPG in future)
        if(".NEF" in filename):
            # added slash here too
            temp_path = path + "/" + folder + "/" + filename
            try:
                img_array = cv2.imread(temp_path)
                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
                img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                data.append(img_array)
                count += 1

            except Exception as e:
                pass
print("images processed:", count)

train_data_controller.z already exists, if this notebook is run to completion the old data will be replaced.
C:\Users\mattp\Documents\CoemergenceLab\WorldModel_self\images
FOLDER:  2021-02-27
FOLDER:  2021-03-01
FOLDER:  2021-03-03
FOLDER:  2021-03-04
FOLDER:  2021-03-08
FOLDER:  2021-03-09
FOLDER:  2021-03-10
FOLDER:  2021-06-21
FOLDER:  2021-06-23
FOLDER:  2021-06-25
FOLDER:  2021-06-28
FOLDER:  2021-06-30
FOLDER:  2021-07-01
FOLDER:  2021-07-02
images processed: 754


In [9]:
# some reshaping so the images can be encoded

train_data = data
train_data = np.array(train_data).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
train_data = train_data/255.0
print(train_data.shape[2])

# Reshape 
img_width  = train_data.shape[1]
img_height = train_data.shape[2]
num_channels = 1
x_train = train_data.reshape(train_data.shape[0], img_height, img_width, num_channels)

input_shape = (img_height, img_width, num_channels)
print(input_shape)

128
(128, 128, 1)


In [10]:
# gets the z vectors by running training_data (the images) through the encoder
# note: definiately not the most efficient way to do this
z_vals = []
for img in train_data:
    z_vals.append(encoder.predict(img[None,:,:,:]))

  updates=self.state_updates,


In [11]:
# gets the z' vectors by running z_vals (the latent vectors) through the RNN
# again, efficiency could be better
zprime_vals = []
for z in z_vals:
    zprime_vals.append(rnn.predict(z[None,:,:]))

In [12]:
# gets the action taken
# this code currently just labels the images as 22 so I can make the skeleton of the controller
#    Will need to update this to properly consider actions in the future, see readme for notes
actions = []
for z, zprime in zip(z_vals, zprime_vals):
    actions.append([22])

In [13]:
# makes list of lists for use in training, where each entry is a z, z', action tuple
final_data = []
for z, zprime, action in zip(z_vals, zprime_vals, actions):
    final_data.append([z, zprime, action])
print(final_data[0])

[array([[-0.34742382,  1.4214687 ,  0.29589504, ...,  0.7067528 ,
         0.725762  , -0.3477292 ]], dtype=float32), array([[[1.1670291e-03, 5.1805377e-04, 9.6488714e-02, ...,
         2.3236513e-02, 9.7199380e-01, 1.3059974e-03]]], dtype=float32), [22]]


In [14]:
# finished, now zip the three lists and save the data in the images directory
print(os.getcwd())
joblib.dump(final_data, "train_data_controller.z")

C:\Users\mattp\Documents\CoemergenceLab\WorldModel_self\images


['train_data_controller.z']