# Preparing data and training our memory on it

In [1]:
import os
import sys
# Needed to import local package in .ipynb
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from vision.vAE import VAE
from memory import Memory
import numpy as np
import tensorflow as tf

## Loading random rollout data & preprocessing it

In [3]:
# Load random rollout datasets
path = os.getcwd()
path = path[:-6] + 'data'
state_data = np.load(path + '/rr_data_state.npz')['arr_0']
action_data = np.load(path + '/rr_data_action.npz')['arr_0']
state_data.shape

(258, 1000, 1, 64, 64, 1)


In [4]:
action_data.shape

(258, 1000, 3)

In [5]:
# Cut them to have a round number
state_data = state_data[:250]
action_data = action_data[:250]
len(state_data)

250

In [6]:
# Load up trained VAE
v = VAE()
_ = v(state_data[1,1,:,:,:,:])
v.load(os.getcwd()[:-6] + 'vision')

loading model from /Users/floyd/Documents/Studium/Coxi6/DRL/World-Model-LunarLanderContinuous-v2-with-PPO/world_model/vision/models


In [8]:
# Reshape to feed into vae
state_data_b = np.reshape(state_data, (1000*250,1, 64, 64, 1))
state_data_b.shape

(250000, 1, 64, 64, 1)

## Turn states to latent space z-values

In [15]:
# Turn the states into z_values
z_values = []
for data in state_data_b:
    z_values.append(v.gen_z(data))
z_values = np.asarray(z_values)
z_values.shape

(250000, 1, 32)

## Form fitting sequence input & target data for our memory

In [16]:
# Reshaping back to rollout sequence format
z = np.reshape(z_values, (250, 1000, 32))
z.shape

(250, 1000, 32)

In [18]:
# Creating the input tensor for the memory
x = tf.concat((z[:,:-1,:], action_data[:,:-1,:]), axis=2)
x.shape

TensorShape([250, 999, 35])

In [19]:
# Creating the target tensor for the memory
y = z[:,1:,:]
y = tf.convert_to_tensor(y)
y.shape

TensorShape([250, 999, 32])

In [39]:
# Nice and neat in a dataset
ds = tf.data.Dataset.from_tensor_slices((x, y))
ds

<TensorSliceDataset shapes: ((999, 35), (999, 32)), types: (tf.float32, tf.float32)>

In [22]:
# Saving the current z-values, because loading and preprocessing takes time.
# vae160 = we use z-values generated from a vae with loss::160
tf.data.experimental.save(ds, os.getcwd() + '/saved_data/z_vae160', compression='GZIP')

In [40]:
# Batching the dataset
# Small batches, bc we only have 250 sequences
ds = ds.shuffle(1).batch(4).prefetch(tf.data.experimental.AUTOTUNE)

In [56]:
# Number of batches
len(ds)

63

## Training...

In [50]:
import time

def train(model, data, epochs):
    epoch_losses = []
    for e in range(epochs):
        t = time.time()
        batch_losses = []
        data = data.shuffle(1)
        
        for (x, y) in data:
            state = model.lstm.get_zero_hidden_state(x)
            batch_losses.append(model.train_op(x, y, state))
            
        epoch_losses.append(np.mean(batch_losses))
        print(f"Epoch: {e} done - Loss: {epoch_losses[-1]} - Time: {time.time()-t}")
        
    model.save(os.getcwd() + '/160model')
        
    return batch_losses

In [46]:
# Init Memory and corresponding loss tracker
mem = Memory()
loss = []

In [55]:
# Train
loss.append(train(mem, ds, 42))

Epoch: 0 done - Loss: 1.3459980487823486 - Time: 192.0172119140625
Epoch: 1 done - Loss: 1.3411744832992554 - Time: 192.66047596931458
Epoch: 2 done - Loss: 1.3365046977996826 - Time: 193.72682666778564
Epoch: 3 done - Loss: 1.3317500352859497 - Time: 191.1217441558838
Epoch: 4 done - Loss: 1.3277541399002075 - Time: 194.53779578208923
Epoch: 5 done - Loss: 1.323643684387207 - Time: 210.03446984291077
Epoch: 6 done - Loss: 1.3203871250152588 - Time: 197.75387692451477
Epoch: 7 done - Loss: 1.3172882795333862 - Time: 195.4755642414093
Epoch: 8 done - Loss: 1.313510775566101 - Time: 196.7855122089386
Epoch: 9 done - Loss: 1.3100488185882568 - Time: 195.58611416816711
Epoch: 10 done - Loss: 1.3070094585418701 - Time: 195.1967670917511
Epoch: 11 done - Loss: 1.304121971130371 - Time: 194.9245388507843
Epoch: 12 done - Loss: 1.3003976345062256 - Time: 195.34611320495605
Epoch: 13 done - Loss: 1.2980632781982422 - Time: 195.06553602218628
Epoch: 14 done - Loss: 1.295081615447998 - Time: 195.

In [52]:
import mdrnn.Memory as PiasMemory

In [54]:
pmem = PiasMemory()
ploss = train(pmem, ds, 20)

AttributeError: 'Memory' object has no attribute 'mdn'