In [1]:
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from random import randrange
import tensorflow as tf
import tensorflow.contrib.eager as tfe

# custom modules
from utils     import Options, rgb2gray
from simulator import Simulator
from model import ConvNet

In [2]:
# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)

NeuralPlanner = ConvNet(cub_siz=opt.cub_siz,pub_siz=opt.pob_siz,hist_len=opt.hist_len,logits_units=opt.act_num,
                            num_filt1=opt.num_filt1,kernel_size1=opt.kernel_size1,num_filt2=opt.num_filt2,
                            kernel_size2=opt.kernel_size2,pool_size=opt.pool_size,dense_units=opt.dense_units,dropout_rate=opt.dropout_rate)

saver = tfe.Saver(NeuralPlanner.variables)
saver.restore(tf.train.latest_checkpoint(opt.checkpoint_dir_save))

INFO:tensorflow:Restoring parameters from \tmp\tensorflow\NeuralPlanner\checkpoints\save\ckpt-5000


In [6]:
# 2. initializations
if opt.disp_on:
    win_all = None
    win_pob = None
epi_step = 0    # #steps in current episode
nepisodes = 0   # total #episodes executed
nepisodes_solved = 0
action = 0     # action to take given by the network

# start a new game
state = sim.newGame(opt.tgt_y, opt.tgt_x)
full_state = np.zeros((opt.hist_len,opt.state_siz))

In [7]:
# 1. control loop
for step in range(opt.eval_steps):
    # check if episode ended
    if state.terminal or epi_step >= opt.early_stop:
        epi_step = 0
        nepisodes += 1
        if state.terminal:
            nepisodes_solved += 1
        # start a new game
        state = sim.newGame(opt.tgt_y, opt.tgt_x)
        full_state = np.zeros((opt.hist_len,opt.state_siz))
    else:
        full_state = np.append(full_state, rgb2gray(state.pob).reshape(1, opt.state_siz), 0)
        if full_state.shape[0] > opt.hist_len: # remove the oldest history
            full_state = np.delete(full_state, 0, 0)
            
        full_state_flat = np.asarray(full_state.reshape(1,opt.hist_len*opt.state_siz),dtype=np.float32)
        full_state_flat = tf.data.Dataset.from_tensor_slices((full_state_flat,np.zeros((1,5))))
        for image,label in tfe.Iterator(full_state_flat):
            action = NeuralPlanner.predict(image,training=False)
        action = np.argmax(action)
        
        state = sim.step(action)

        epi_step += 1

    if state.terminal or epi_step >= opt.early_stop:
        epi_step = 0
        nepisodes += 1
        if state.terminal:
            nepisodes_solved += 1
        # start a new game
        state = sim.newGame(opt.tgt_y, opt.tgt_x)

    if step % opt.prog_freq == 0:
        print(step)

    if opt.disp_on:
        if win_all is None:
            plt.subplot(121)
            win_all = plt.imshow(state.screen)
            plt.subplot(122)
            win_pob = plt.imshow(state.pob)
        else:
            win_all.set_data(state.screen)
            win_pob.set_data(state.pob)
        plt.pause(opt.disp_interval)
        plt.draw()

# 2. calculate statistics
print(float(nepisodes_solved) / float(nepisodes))
# 3. TODO perhaps  do some additional analysis

0
500
1000
1500
2000
1.0
