In [13]:
# % matplotlib inline

import numpy as np
import pandas as pd
import json
import sys
import os
import matplotlib
#matplotlib.use('Agg') 
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import seaborn as sns
import pdb
#import ipdb
import h5py
import importlib
import pickle
import math

# Add root folder to sys path
sys.path.append("../")

from load_expert_traj import recursively_save_dict_contents_to_group

# %pylab inline
# inline doesn't give interactive plots
# %matplotlib inline 
# %matplotlib notebook
plt.rcParams['figure.figsize'] = (6.0, 6.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'Blues'

sns.set()

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
# %load_ext autoreload
# %autoreload 2

# Import own modules
# Need to use importlib since module starts with a numerical value.

In [14]:
import copy
def draw_trajectory_for_paralel_first(begin_posi=(2,2), goal_posi=(8,12), plot=False):
    x = [begin_posi[0]]
    y = [begin_posi[1]]
    current_x = copy.deepcopy(x[0])
    current_y = copy.deepcopy(y[0])

    while current_x < 3:
        current_x += 1
        x.append(current_x)
        y.append(current_y)
    # if current_y > 10:
    #     current_y -= 1
    # else:
    #     current_y += 1
    # x.append(current_x)
    # y.append(current_y)
    # move vertical
    while current_y != 7:
        current_y = current_y + 1 if current_y < 7 else current_y - 1
        x.append(current_x)
        y.append(current_y)
    # move along corridor to the right
    while current_x != 7:
        current_x = current_x + 1 #if current_x < 7 else current_x - 1
        x.append(current_x)
        y.append(current_y)
    # move to the goal vertical
    while current_y != goal_posi[1]:
        current_y = current_y + 1 if current_y < goal_posi[1] else current_y - 1
        x.append(current_x)
        y.append(current_y)
    while current_x != goal_posi[0]:
        current_x = current_x + 1 
        x.append(current_x)
        y.append(current_y)
    print('x: ', x)
    print('y: ', y)
    return x, y

In [15]:
def get_state_action_from_positions(pos_x_list, pos_y_list):
    # states are (x, y) actions are (delta_x, delta_y)
    states, actions = [], []
    for i in range(len(pos_x_list)):
        states.append((pos_x_list[i], pos_y_list[i]))
    for i in range(1, len(states)):
        if pos_x_list[i - 1] == pos_x_list[i]:
            if pos_y_list[i - 1] == pos_y_list[i] + 1:
                # down
                a = 1
            else:
                # up
                a = 0
        else:
            if pos_x_list[i - 1] == pos_x_list[i] + 1:
                # left
                a = 2
            else:
                # up
                a = 3       
        # a = [a0, a1] / np.linalg.norm([a0, a1])
        actions.append(a)
    # Remove the last state
    return states[:-1], actions

def save_expert_traj_dict_to_h5(traj_data_dict, save_dir,
                               h5_filename='expert_traj.h5'):
     h5_f = h5py.File(os.path.join(save_dir, h5_filename), 'w')
     recursively_save_dict_contents_to_group(h5_f, '/', traj_data_dict)
     h5_f.flush()
     h5_f.close()
     print("Did save data to {}".format(os.path.join(save_dir, h5_filename)))



### make demonstrations

In [16]:
import random
from tqdm import trange
from itertools import product
# Get trajectories for policy 1 (10 trajectories)
num_traj = 50

begin_posi_center = [(1,3), (2, 3), (3, 2), (3, 1), (1, 11), (2, 11), (3, 12), (3, 13)]
goal_posi_center = [(7,2), (7,1), (8,3), (9, 3), (7,12), (7,13), (8,11), (9,11)]
# begin_posi_center = [(2,2), (2, 12)]
# goal_posi_center = [(8,2), (8, 12)]
begin_posi_list = []
goal_posi_list = []
for _ in range(num_traj):
    # begin
    center_begin = random.sample(begin_posi_center, 1) # select center
    x_begin = center_begin[0][0]
    y_begin = center_begin[0][1]

    # x_begin = center_begin[0][0] + random.randint(-1, 1) # randomx
    # if x_begin == (center_begin[0][0] + 1):  # only random y at x == 3
    #     y_begin = center_begin[0][1] + random.randint(-1, 1)
    # elif center_begin[0][1] > 10: # y = 11
    #     y_begin = center_begin[0][1] - 1
    # else: # y = 3
    #     y_begin = center_begin[0][1] + 1
    begin_posi_list.append((x_begin, y_begin))
    print('begin:', (x_begin, y_begin))
    
    # goal
    center_goal = random.sample(goal_posi_center, 1) # select center
    x_goal = center_goal[0][0]
    y_goal = center_goal[0][1]    
    # x_goal = center_goal[0][0] + random.randint(-1, 1) # randomx
    # if x_goal == (center_goal[0][0] - 1):  # only random y at x == 7
    #     y_goal = center_goal[0][1] + random.randint(-1, 1)
    # elif center_goal[0][1] > 10: # y = 11
    #     y_goal = center_goal[0][1] - 1
    # else: # y = 3
    #     y_goal = center_goal[0][1] + 1
    goal_posi_list.append((x_goal, y_goal))
    print('->goal:',(x_goal,y_goal))


env_data_dict = {'num_goals': 4, 'num_actions': 4}
expert_data_dict = {}
for i in trange(num_traj):
    begin_posi = begin_posi_list[i]
    goal_posi = goal_posi_list[i]
    # r2 = np.random.choice(r2_arr)
    # r3 = np.random.choice(r3_arr)
    states_x, states_y = draw_trajectory_for_paralel_first(
        begin_posi=begin_posi, goal_posi=goal_posi,plot=False)

    states, actions = get_state_action_from_positions(states_x, states_y)
    # key = '{}_p1_{:.2f}_{:.2f}_{:.2f}'.format(i, r1, r2, r3)
    key = f'{i}'
    expert_data_dict[key] = {}
    expert_data_dict[key]['state'] = np.array(states)
    expert_data_dict[key]['action'] = np.array(actions)
    expert_data_dict[key]['goal'] = np.ones(len(states)) * i
    # expert_data_dict[key]['radius'] = np.array([r1, r2, r3])


from grid_world import create_obstacles
grid_width = 11
grid_height = 15
obstacles, rooms, room_centres = create_obstacles(
        width=grid_width,
        height=grid_height,
        env_name='room',
        room_size=3)
set_diff = list(set(product(tuple(range(0, grid_width)),tuple(range(0, grid_height)))) \
                - set(obstacles))
                
data_to_save = {
    'expert_traj': expert_data_dict,
    'obstacles': obstacles,
    'set_diff': set_diff,
    'env_data': env_data_dict,
}

SAVE_DATA = True
if SAVE_DATA:
    import os
    save_dir = '../h5_trajs/room_trajs/traj_len_16'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_expert_traj_dict_to_h5(data_to_save, save_dir)

begin: (1, 11)
->goal: (7, 12)
begin: (3, 12)
->goal: (7, 1)
begin: (3, 1)
->goal: (7, 12)
begin: (3, 13)
->goal: (8, 11)
begin: (1, 3)
->goal: (8, 11)
begin: (1, 3)
->goal: (7, 12)
begin: (3, 13)
->goal: (8, 3)
begin: (3, 1)
->goal: (7, 2)
begin: (3, 12)
->goal: (7, 12)
begin: (2, 11)
->goal: (8, 11)
begin: (3, 1)
->goal: (7, 1)
begin: (1, 3)
->goal: (7, 1)
begin: (3, 12)
->goal: (7, 13)
begin: (1, 11)
->goal: (7, 1)
begin: (3, 1)
->goal: (9, 11)
begin: (3, 12)
->goal: (7, 13)
begin: (3, 12)
->goal: (7, 12)
begin: (3, 12)
->goal: (7, 2)
begin: (3, 12)
->goal: (7, 2)
begin: (3, 2)
->goal: (8, 3)
begin: (1, 3)
->goal: (8, 11)
begin: (3, 1)
->goal: (7, 1)
begin: (3, 12)
->goal: (8, 3)
begin: (3, 13)
->goal: (7, 2)
begin: (1, 3)
->goal: (9, 3)
begin: (1, 11)
->goal: (8, 3)
begin: (1, 3)
->goal: (9, 11)
begin: (3, 12)
->goal: (7, 12)
begin: (2, 3)
->goal: (8, 3)
begin: (1, 11)
->goal: (7, 1)
begin: (3, 12)
->goal: (8, 3)
begin: (3, 12)
->goal: (8, 11)
begin: (3, 12)
->goal: (7, 12)
begin: 

100%|██████████| 50/50 [00:00<00:00, 12192.04it/s]

x:  [1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7]
y:  [11, 11, 11, 10, 9, 8, 7, 7, 7, 7, 7, 8, 9, 10, 11, 12]
x:  [3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7]
y:  [12, 11, 10, 9, 8, 7, 7, 7, 7, 7, 6, 5, 4, 3, 2, 1]
x:  [3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7]
y:  [1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 8, 9, 10, 11, 12]
x:  [3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 8]
y:  [13, 12, 11, 10, 9, 8, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11]
x:  [1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 8]
y:  [3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11]
x:  [1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7]
y:  [3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 8, 9, 10, 11, 12]
x:  [3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 8]
y:  [13, 12, 11, 10, 9, 8, 7, 7, 7, 7, 7, 6, 5, 4, 3, 3]
x:  [3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7]
y:  [1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 6, 5, 4, 3, 2]
x:  [3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7]
y:  [12, 11, 10, 9, 8, 7, 7, 7, 7, 7, 8, 9, 10, 11, 12]
x:  [2, 3, 3, 3,


