In [1]:
# Author: Suhan Shetty | suhan.n.shetty@gmail.com
# This is an implementation of behavior cloning for different agents available in openai gym environrment. 
# The expert policy is already available and the dataset-[observations, actions] is included as expert_data
#  for different environments.

#Ref:http://rail.eecs.berkeley.edu/deeprlcourse/

# Jupyter-notebook shortcuts (Press Esc first):
# Cmd + Shift + P - pops up keyboard shortcuts
# Shift+L - toggles line numbering
# Ctrl+Enter - Run the current Cell
# Shift+Tab - indent / de-indent
# D + D - delete the current cell

In [2]:
# Import the data from an expert policy: [observations, actions]
import numpy as np
import math
import pickle

# read the dataset
agent = "expert_data/Humanoid-v2.pkl"
expert_policy = pickle.load( open( agent, "rb" ) )

# separate datset into input and output
observations_data = expert_policy['observations'] 
actions_data = expert_policy['actions'] 
print("-------------------------------------------------------")
print("Shape of Input: ", observations_data.shape)
print("Shape of Output: ", actions_data.shape)

# squeeze the ouput_data matrix to 2D array
actions_data = np.squeeze(actions_data, axis=1)
print("Reshaped Output: ",actions_data.shape)

# verify the shape of the data 
assert observations_data.shape[0] == actions_data.shape[0] 
assert (actions_data.ndim == 2)&(actions_data.ndim == 2)
print("-------------------------------------------------------")

-------------------------------------------------------
Shape of Input:  (19152, 376)
Shape of Output:  (19152, 1, 17)
Reshaped Output:  (19152, 17)
-------------------------------------------------------


In [3]:
# Separate dataset into training and test data
data_size = observations_data.shape[0]
test_size = int(data_size/10)

index = np.random.choice(range(data_size), size=test_size, replace=False)
test_observations_data = observations_data[index,:]
test_actions_data = actions_data[index,:]

# exclude the test data from training data
observations_data = np.delete(observations_data, index, axis=0)
actions_data = np.delete(actions_data, index, axis=0)

print("-------------------------------------------------------")
print("Size of input data, training: ", observations_data.shape)
print("Size of output data, training: ", actions_data.shape)
print("Size of input data, testing: ", test_observations_data.shape)
print("Size of output data, testing: ", test_actions_data.shape)
print("-------------------------------------------------------")

-------------------------------------------------------
Size of input data, training:  (17237, 376)
Size of output data, training:  (17237, 17)
Size of input data, testing:  (1915, 376)
Size of output data, testing:  (1915, 17)
-------------------------------------------------------


In [4]:
# Setup tensorflow
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import os

import tensorflow as tf
print("tf version: ", tf.VERSION)

tf version:  1.11.0


In [5]:
# Start a tf session
tf.reset_default_graph()
sess = tf.Session()

In [6]:
# Tip: if you run into problems with TensorBoard
# clear the contents of this directory, re-run this script
# then restart TensorBoard to see the result
LOGDIR = './graphs'

if not os.path.exists(LOGDIR):
    os.makedirs(LOGDIR)


### Tensor Board Setup
writer = tf.summary.FileWriter(LOGDIR)
writer.add_graph(tf.get_default_graph())

#!tensorboard --logdir .

In [7]:
# DNN Architeture : 
# Ref: https://github.com/tensorflow/workshops/blob/master/extras/archive/03_deep_neural_network_low_level.ipynb

In [8]:
# Hyper-parameters
LAYER_SIZE=64
LEARNING_RATE = 0.001
TRAIN_STEPS = 100000
BATCH_SIZE = 256

In [9]:
# Define inputs
with tf.name_scope('input'):
    observations = tf.placeholder(tf.float32, [None, observations_data.shape[1]], name="observations")
    actions = tf.placeholder(tf.float32, [None, actions_data.shape[1]], name="actions")

print("observations tensor: ", observations)
print("actions tensor: ", actions)

observations tensor:  Tensor("input/observations:0", shape=(?, 376), dtype=float32)
actions tensor:  Tensor("input/actions:0", shape=(?, 17), dtype=float32)


In [10]:
# Ref:https://www.tensorflow.org/api_docs/python/tf/layers/dense


with tf.name_scope('layers'):
    fc1 = tf.layers.dense(inputs=observations,  activation=tf.nn.relu, units=LAYER_SIZE, name='fc1')
    dropped_1 = tf.nn.dropout(fc1, keep_prob=0.9)
    fc2 = tf.layers.dense(inputs=dropped_1,  activation=tf.nn.relu, units=LAYER_SIZE, name='fc2')
    dropped_2 = tf.nn.dropout(fc2, keep_prob=0.9)
    fc_last = tf.layers.dense(inputs=dropped_2,  activation=tf.nn.relu, units=LAYER_SIZE, name='fc_last_hidden')

In [11]:
# output layer
with tf.name_scope('output'):
    final_output = tf.layers.dense(inputs=fc_last,  activation=None, units=actions_data.shape[1], name='final_output')
    actions_pred = tf.identity(final_output, name='actions_pred')

In [12]:
# Define loss and an optimizer
with tf.name_scope("loss"):
    #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=labels))
    loss = tf.reduce_mean(tf.square(actions_pred - actions))
    tf.summary.scalar('loss', loss)

with tf.name_scope("optimizer"):
    train = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

In [13]:
# Set up logging.
# We'll use a second FileWriter to summarize accuracy on
# the test set. This will let us display it nicely in TensorBoard.
train_writer = tf.summary.FileWriter(os.path.join(LOGDIR, "train"))
train_writer.add_graph(sess.graph)
test_writer = tf.summary.FileWriter(os.path.join(LOGDIR, "test"))
summary_op = tf.summary.merge_all()

In [14]:
sess.run(tf.global_variables_initializer())

In [15]:
# Function to sample a batch of data from training set
def sample_data(observations_data, actions_data, batch_size):
    index = np.random.choice(range(observations_data.shape[0]),size=batch_size, replace=False)
    sample_actions = actions_data[index,:]
    sample_observations = observations_data[index,:]
    
    return sample_observations, sample_actions

In [16]:
# training

#Create a saver object which will save all the variables
saver = tf.train.Saver()
agent_name = "humanoid_2"
export_path = "./saved_model_"+agent_name
if not os.path.exists(export_path):
    os.makedirs(export_path)
    
    

for step in range(TRAIN_STEPS):
    batch_obs, batch_actions = sample_data(observations_data, actions_data, BATCH_SIZE )
    #batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
    summary_result, mse_run, _ = sess.run([summary_op, loss, train], 
                                    feed_dict={observations: batch_obs, actions: batch_actions})

    train_writer.add_summary(summary_result, step)
    train_writer.add_run_metadata(tf.RunMetadata(), 'step%03d' % step)
    
    # calculate accuracy on the test set, every 100 steps.
    # we're using the entire test set here, so this will be a bit slow
    if step % 1000 == 0:
        test_writer.add_summary(summary_result, step)
        print('train_step: {0:04d} mse: {1:.3f}'.format(step, mse_run))
        #save the graph
        saver.save(sess, export_path+"/"+agent_name)
        
train_writer.close()
test_writer.close()

train_step: 0000 mse: 1913.418
train_step: 1000 mse: 0.532
train_step: 2000 mse: 0.447
train_step: 3000 mse: 0.386
train_step: 4000 mse: 0.332
train_step: 5000 mse: 0.317
train_step: 6000 mse: 0.269
train_step: 7000 mse: 0.258
train_step: 8000 mse: 0.212
train_step: 9000 mse: 0.169
train_step: 10000 mse: 0.151
train_step: 11000 mse: 0.150
train_step: 12000 mse: 0.119
train_step: 13000 mse: 0.118
train_step: 14000 mse: 0.115
train_step: 15000 mse: 0.107
train_step: 16000 mse: 0.094
train_step: 17000 mse: 0.100
train_step: 18000 mse: 0.094
train_step: 19000 mse: 0.089
train_step: 20000 mse: 0.094
train_step: 21000 mse: 0.089
train_step: 22000 mse: 0.087
train_step: 23000 mse: 0.087
train_step: 24000 mse: 0.085
train_step: 25000 mse: 0.079
train_step: 26000 mse: 0.081
train_step: 27000 mse: 0.074
train_step: 28000 mse: 0.074
train_step: 29000 mse: 0.075
train_step: 30000 mse: 0.076
train_step: 31000 mse: 0.071
train_step: 32000 mse: 0.085
train_step: 33000 mse: 0.066
train_step: 34000 mse