# Basic Usage Guide for Obstacle Tower Gym Interface

In [None]:
from obstacle_tower_env import ObstacleTowerEnv, ObstacleTowerEvaluation
%matplotlib inline
from matplotlib import pyplot as plt
from IPython.display import display, clear_output
import numpy as np

# import matplotlib.pyplot as plt
# import matplotlib.animation as animation

## Launching the environment
Ensure that the Obstacle Tower binary has been downloaded (https://github.com/Unity-Technologies/obstacle-tower-env#download-the-environment), and placed in the correct sub-folder. Here we use the `examples/ObstacleTower` sub-folder.

In [None]:
# Realtime mode determines whether the environment window will render the scene,
# as well as whether the environment will run at realtime speed. Set this to `True`
# to visual the agent behavior as you would in player mode.

env = ObstacleTowerEnv('./ObstacleTower/obstacletower', retro=False, realtime_mode=True)

## Environment information
We can also set the random seed used to generate the environment, as well as choose a starting floor.

In [None]:
# The environment provided has a MultiDiscrete action space, where the 4 dimensions are:

# 0. Movement (No-Op/Forward/Back)
# 1. Camera Rotation (No-Op/Counter-Clockwise/Clockwise)
# 2. Jump (No-Op/Jump)
# 3. Movement (No-Op/Right/Left)
print(env.action_space)


# The observation space provided includes a 168x168 image (the camera from the simulation)
# as well as the number of keys held by the agent (0-5) and the amount of time remaining.
print(env.observation_space)

## Interacting with the environment

In [None]:
import numpy as np

seed = 5       # seed = np.random.randint(100)
env.seed(seed) # Seeds can be chosen from range of 0-100.
env.floor(0)   # Floors can be chosen from range of 0-100.
obs = env.reset()

plt.imshow(obs[0])

In [None]:
def run_episode(env):
    done = False
    seed = 5
    env.seed(seed)
    env.floor(0)
    obs = env.reset()
    episode_return = 0.0
    action=[1, 0, 0, 0]
    
    while not done:
        obs, reward, done, info = env.step(env.action_space.sample())
        if not done:
            obs, reward, done, info = env.step(action)
        episode_return += reward
    return episode_return

In [None]:
##### Run specify step ##### 

action=[1, 0, 0, 0]
r = 0

### img ###
fig = plt.figure()
ims = []


# observation = [camera, key, time, floor]
# 1 env.step = 50mms
for i in range(0, 50):
    obs, reward, done, info = env.step(env.action_space.sample())
    if not done:
        obs, reward, done, info = env.step(action)

    # im = plt.imshow(obs[0], animated =False)
    # ims.append([im])
    # clear_output(True)
    # plt.show()
    
    r += reward
    if r>0 :
        print("Reward: %.2f" % r)
    
    if done:
        obs = env.reset()
        print("Result Reward: %.2f" % r)

    ## test ##

    # ims is a list of lists, each row is a list of artists to draw in the
    # current frame; here we are just animating one artist, the image, in
    # each frame
    
# ani = animation.ArtistAnimation(fig, ims, interval=50, blit=True, repeat = False, repeat_delay=None)

# To save the animation, use e.g.
# from matplotlib.animation import FFMpegWriter
# ani.save("movie.mp4")
# writer = FFMpegWriter(fps=15, metadata=dict(artist='Me'), bitrate=1800)
# ani.save("movie.mp4", writer=writer)
# plt.show()
##########

In [None]:
print(obs)

In [None]:
env.close()

In [None]:
##### Run until done ##### 

#env = ObstacleTowerEnv('./ObstacleTower/obstacletower', retro=False, realtime_mode=True)
#env = ObstacleTowerEvaluation(env, eval_seeds)


    
print("Total Reward: ",run_episode(env))

In [None]:
env.close()

## Setting environment parameters
We can also set the random seed used to generate the environment, as well as choose a starting floor.

In [None]:
# Seeds can be chosen from range of 0-100.
env.seed(5)

In [None]:
# Floors can be chosen from range of 0-100.
env.floor(15)

In [None]:
# Additional reset parameters can be set using a config dictionary
# Here we set the agent perspective to first-person mode.
config = {'agent-perspective': 1}

In [None]:
# These parameters won't take place until the next reset.
obs = env.reset(config=config)
plt.imshow(obs[0])

## Evaluation

In [None]:
from obstacle_tower_env import ObstacleTowerEnv, ObstacleTowerEvaluation
%matplotlib inline
from matplotlib import pyplot as plt

def run_episode(env):
    done = False
    episode_return = 0.0
    
    while not done:
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        episode_return += reward
    return episode_return

In [None]:
if __name__ == '__main__':
    # In this example we use the seeds used for evaluating submissions 
    # to the Obstacle Tower Challenge.
    #eval_seeds = [1001, 1002, 1003, 1004, 1005]
    eval_seeds = [1001]

    # Create the ObstacleTowerEnv gym and launch ObstacleTower
    env = ObstacleTowerEnv('./ObstacleTower/obstacletower', realtime_mode=False)

    # Wrap the environment with the ObstacleTowerEvaluation wrapper
    # and provide evaluation seeds.
    env = ObstacleTowerEvaluation(env, eval_seeds)

    # We can run episodes (in this case with a random policy) until 
    # the "evaluation_complete" flag is True.  Attempting to step or reset after
    # all of the evaluation seeds have completed will result in an exception.
    while not env.evaluation_complete:
        episode_rew = run_episode(env)

    # Finally the evaluation results can be fetched as a dictionary from the 
    # environment wrapper.
    print(env.results)



In [None]:
env.close()

## Closing the environment

In [None]:
env.close()

## Launching the environment (retro mode)
We also provide a `retro mode` which uses observation and action spaces similar to those found in the Arcade Learning Environment (ALE).

In [None]:
env = ObstacleTowerEnv('./ObstacleTower/obstacletower', retro=True)

In [None]:
# In retro mode, the observation is an 84x84 image with the time remaining and key count visually embedded.
env.observation_space

## Interacting with the environment (retro mode)

In [None]:
obs = env.reset()
print(obs.shape)

In [None]:
obs, reward, done, info = env.step(env.action_space.sample())
plt.imshow(obs)

## Closing the environment

In [None]:
env.close()