# Reproduction: Planning in Dynamic Environments with Conditional Autoregressive Models

**Abhinav Madahar (<abhinav.madahar@rutgers.edu>) &middot; Sungjin Ahn**

In [1]:
import math
import random
from collections import namedtuple
from enum import Enum

random.seed(0)

## Making the Environment

We first make the environment.
These are some, but not all, of the parts I need to finish to make the environment.

- [ ] It's a videogame with a goal and moving obstacles.
- [ ] The obstacles move horizontally, like the cars in *Frogger*.
- [x] Movement about the environment is continuous, but collision and goal checking is quantized to the nearest pixel.
- [x] The agent (controlled by the model) is a $1 \times 1$ object and the goal is $2 \times 2$.
- [x] Both are randomly placed.
- [x] The goal is initialized with a velocity with random direction and fixed speed.
- [x] The agent must reach the goal in a set time without hitting an obstacle.
- [x] The agent can have 1 of 2 fixed speeds, 0.5 pixels/timestep or 1 pixel/timestep.
- [ ] At each timestep the agent has the choice of 8 actions. These actions indicate one of 8 equally spaced angles and a constant speed.
- [x] The goal also reflects off world boundaries, like in billards.
- [x] The environment has configurable height and width.

I want the `Environment` class to be completely deterministic, assuming that the model is deterministic.
The randomness, like for placing the goal and agent, are separated into other functions.
We also create an enum to keep track of whether the current game is running, has been won, or has been lost.

In [2]:
class Vector2D:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __add__(self, v):
        return Vector2D(self.x + v.x, self.y + v.y)
    
    def __mul__(self, a):
        return Vector2D(a * self.x, a * self.y)
    
    def __repr__(self):
        return '{}({}, {})'.format(type(self).__name__, self.x, self.y)
    
    def __eq__(self, v):
        return self.x == v.x and self.y == v.y

class Position(Vector2D): 
    pass

class Velocity(Vector2D): 
    pass
        
def random_position(height, width):
    """Generates a psuedorandom position where 0 <= x < height and 0 <= y < width. The x and y are integers."""
    return Position(random.randrange(height), random.randrange(width))

def random_velocity(speed):
    theta = random.uniform(-1, 1) * math.pi
    return Velocity(speed * math.cos(theta), speed * math.sin(theta))

class GameState(Enum):
    ONGOING = 1
    WON = 2
    LOST = 3

We can now make the `Environment` class.

In [3]:
class Environment:
    def __init__(self, height, width, agent_pos, goal_pos, agent_speed, goal_vel, max_time):
        """
            Initialize the environment.
            
            Args:
                height: int, the height of the environment.
                width: int, the width of the environment.
                agent_pos: Position, the initial position of the agent.
                goal_pos: Position, the initial position of the goal; the goal is 2x2, so the goal_pos is its bottom-left corner.
                agent_speed: float, the speed (not velocity) of the agent. The model controls the direction.
                goal_vel: Velocity, the velocity of the goal at the start of the experiment. This can change if it hits a wall.
                max_time: int, the maximum number of timesteps which can elapse before an automatic loss.
        """
        
        self.height = height
        self.width = width
        
        assert 0 <= agent_pos.x <= width
        assert 0 <= agent_pos.y <= height
        assert 0 <= goal_pos.x <= width - 1
        assert 0 <= goal_pos.y <= height - 1  # ditto
        self.agent_pos = agent_pos
        self.goal_pos = goal_pos

        self.agent_speed = agent_speed
        self.goal_vel = goal_vel
        
        self.max_time = max_time
        self.time = 0
    
    def __repr__(self):
        return 'Environment(height={}, width={}, agent_pos={}, goal_pos={}, agent_speed={}, goal_vel={})'.format(
            self.height, self.width, self.agent_pos, self.goal_pos, self.agent_speed, self.goal_vel)
    
    def __str__(self):
        """An ASCII diagram of the environment."""

        board = '|' + '-' * self.width + '|\n'
        for row in range(self.height):
            board += '|'
            for col in range(self.width):
                if Position(row, col) == self.rounded(self.agent_pos):
                    board += 'a'
                elif Position(row, col) in self.hitbox(self.goal_pos, (2, 2)):
                    board += 'g'
                else:
                    board += ' '
            board += '|\n'
        board += '|' + '-' * self.width + '|'
        
        return board
    
    def move(self):
        """Makes all the objects move in a single timestep and returns the game state."""
        
        def move_object(obj, vel):
            if (obj + vel).x > self.width:
                t = (self.width - obj.x) / vel.x
                obj += vel * t
                vel.x = -vel.x  # the agent bounces off the wall, so it flips its x movement

            if (obj + vel).y > self.height:
                t = (self.width - obj.y) / vel.y
                obj += vel * t
                vel.y = -vel.y
                obj += vel * (1-t)

            else:
                obj += vel
            
            return obj
        
        self.agent_pos = move_object(self.agent_pos, Velocity(1, 1))
        self.goal_pos = move_object(self.goal_pos, self.goal_vel)

        self.time += 1
        
        if self.rounded(self.agent_pos) in self.hitbox(self.goal_pos, [2, 2]):
            return GameState.WON
        
        if self.time >= self.max_time:
            return GameState.LOST
        
        return GameState.ONGOING

    def hitbox(self, position, shape: [int, int]):
        """
        Returns an array of all the positions which are in this element's hitbox.
        Note that shape = [length in x, height in y].
        """
        
        position = self.rounded(position)
        x = position.x
        y = position.y
        return [Position(x + offset_x, y + offset_y) for offset_x in range(shape[0]) for offset_y in range(shape[0])]

    def rounded(self, position):
        return Position(round(position.x), round(position.y))

In [4]:
height = 10
width = 10
max_time
goal_speed = 1
agent_pos = random_position(height, width)
goal_pos = random_position(height-1, width-1)
agent_speed = 1
goal_vel = random_velocity(goal_speed)
environment = Environment(height, width, agent_pos, goal_pos, agent_speed, goal_vel, max_time)
environment.move()

<GameState.LOST: 3>