In [1]:
from codecs import mbcs_decode
from ctypes.wintypes import WORD
from pickle import TUPLE
from platform import python_branch
import gym
from gym import Env
import numpy as np
import pygame
from gym import spaces
from tensorflow import keras
from keras import layers
from keras.optimizers import Adam
from keras import Sequential
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy

In [28]:
class Game(Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 3}

    def __init__(self, render_mode=None, size=7):
        self.size = size  # The size of the square grid
        self.window_size = 512  # The size of the PyGame window
        self.human = 'human'
        self.rgb = 'rgb_array'
        
        self.observation_space = spaces.Box(np.array([0,0]), np.array([size-1,size-1]), shape=(2,),dtype=int)
        print(self.observation_space)
        
        #Moveset of the Agent (Left, Right, Up, Down, Stay)
        self.action_space = spaces.Discrete(4)
        self._action_to_direction = {
            0: np.array([1, 0],dtype=int),
            1: np.array([0, 1],dtype=int),
            2: np.array([-1, 0],dtype=int),
            3: np.array([0, -1],dtype=int),
            #4: np.array([0,0])
        }

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        """
        If human-rendering is used, `self.window` will be a reference
        to the window that we draw to. `self.clock` will be a clock that is used
        to ensure that the environment is rendered at the correct framerate in
        human-mode. They will remain `None` until human-mode is used for the
        first time.
        """
        self.window = None
        self.clock = None


    def _get_Agent(self):
        # if(self._target_location[0] and self._target_location[1]  in np.array( [self._agent_location + [1,1],
        #                                         self._agent_location +  [1,-1],
        #                                         self._agent_location +  [-1,-1],
        #                                         self._agent_location +  [-1,1],
        #                                         self._agent_location +  [2,0],
        #                                         self._agent_location +  [0,2],
        #                                         self._agent_location +  [-2,0],
        #                                         self._agent_location +  [0,-2],
        #                                         self._agent_location +  [1,0],
        #                                         self._agent_location +  [-1,0],
        #                                         self._agent_location +  [0,1],
        #                                         self._agent_location +  [0,-1]], dtype=int)):
            #pygame.quit()
        # distance = np.linalg.norm(self._agent_location - self._target_location, ord=1)
        # if distance <= 2:
        #     #return {"target":self._target_location,"agent": self._agent_location }
        #     return self._target_location,self._agent_location

        # else:
        #     #return {"agent": self._agent_location}
        #     return self._agent_location
        return self._agent_location
    def _get_Target(self):
        #return {"target":self._target_location}
        return self._target_location



    def reset(self, seed=2, options=None):
        # We need the following line to seed self.np_random
        super().reset(seed=seed)
        self.steps = 0
        self.reward = 0
        # Choose the agent's location uniformly at random
        self._agent_location= self.np_random.integers(
                 self.size-1, self.size, size=2, dtype=int
            ) 
        
        self._obstacle_location = np.array([2,2])

        # We will sample the target's location randomly until it does not coincide with the agent's location
        self._target_location = self._agent_location
        while np.array_equal(self._target_location, self._agent_location) or np.array_equal(self._target_location, self._obstacle_location):
            self._target_location = self.np_random.integers(
                 0, self.size, size=2, dtype=int
            )

        if self.render_mode == self.human:
            self._render_frame()
        
        self.observation = self._get_Agent()
        
        self.info = self._get_Target()
        return self.observation, self.info

    def step(self, action):
            # Map the action (element of {0,1,2,3}) to the direction we walk in
        self.reward_gain = 1
        self.reward_loss = 0.01
        self.steps += 1
        direction = self._action_to_direction[action]

        if(self._agent_location + direction ==self._obstacle_location).all():
            self._agent_location = self._agent_location -direction
        # We use `np.clip` to make sure we don't leave the grid
        self._agent_location = np.clip(
            self._agent_location + direction, 0, self.size - 1,
            
        )
        # An episode is done if the agent has reached the target
        self.terminated = np.array_equal(self._agent_location, self._target_location)
        self.reward += self.reward_gain if self.terminated else - self.reward_loss  # Binary sparse rewards

        if self.render_mode == self.human:
            self._render_frame()
        self.observation = self._get_Agent()
        self.info = self._get_Target()
        return (self.observation, self.reward, self.terminated, False, self.steps, self.info)

    def render(self):
        if self.render_mode == self.rgb:
            return self._render_frame()

    def _render_frame(self):
        if self.window is None and self.render_mode == self.human:
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode((self.window_size, self.window_size))
        if self.clock is None and self.render_mode == self.human:
            self.clock = pygame.time.Clock()

        canvas = pygame.Surface((self.window_size, self.window_size))
        canvas.fill((255, 255, 255))
        pix_square_size = (
            self.window_size / self.size
        )  # The size of a single grid square in pixels

        # First we draw the target
        pygame.draw.rect(
            canvas,
            (255, 0, 0),
            pygame.Rect(
                pix_square_size * self._target_location,
                (pix_square_size, pix_square_size),
            ),
        )
        pygame.draw.rect(
            canvas,
            (0, 0, 0),
            pygame.Rect(
                pix_square_size * self._obstacle_location,
                (pix_square_size, pix_square_size),
            ),
        )
        # Now we draw the agent
        pygame.draw.circle(
            canvas,
            (0, 0, 255),
            (self._agent_location + 0.5) * pix_square_size,
            pix_square_size / 3,
        )
        
        
       
        # Finally, add some gridlines
        for x in range(self.size + 1):
            pygame.draw.line(
                canvas,
                0,
                (0, pix_square_size * x),
                (self.window_size, pix_square_size * x),
                width=2,
            )
            pygame.draw.line(
                canvas,
                0,
                (pix_square_size * x, 0),
                (pix_square_size * x, self.window_size),
                width=2,
            )

        if self.render_mode == self.human:
            # The following line copies our drawings from `canvas` to the visible window
            self.window.blit(canvas, canvas.get_rect())
            pygame.event.pump()
            pygame.display.update()

            # We need to ensure that human-rendering occurs at the predefined framerate.
            # The following line will automatically add a delay to keep the framerate stable.
            self.clock.tick(self.metadata["render_fps"])
        else:  # rgb_array
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
            )

    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()


In [29]:
env = Game()


Box(0, 6, (2,), int32)


In [30]:
env.observation_space.sample()

array([5, 0])

In [31]:
human = 'human'
rgb = 'rgb_array'
env = Game(render_mode=rgb)

Box(0, 6, (2,), int32)


In [32]:
env.reset()
episodes = 20
score = 0

for episode in range(1, episodes+1):
    steps = 0
    done = False
    observation = env.reset()
    score = 0 
    while not done:
        action = env.action_space.sample()
        observation = env.step(action)
        env.render()
        steps +=1
        if(np.array_equal(env._agent_location, env._target_location)):
            score += env.reward
            done = True
            
    print('Episode:{} Score:{}'.format(episode,score), "steps:{} ".format(steps))
env.close()

AttributeError: module 'numpy' has no attribute 'flatten'

In [20]:
states = env.observation_space.shape
actions = env.action_space.n

In [21]:
print(actions)
print(states)

4
(2,)


In [22]:
def build_model(states, actions):
    model = Sequential()    
    model.add(layers.Dense(24, activation='relu', input_shape=(1,2)))
    #model.add(layers.Dense(24, activation='relu', input_shape=(1,4)))
    model.add(layers.Flatten()) 
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(actions, activation='linear'))
    
    return model

In [None]:
del model 

In [23]:
model = build_model(states, actions)
print(model.output_shape)

(None, 4)


In [24]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 1, 24)             72        
                                                                 
 flatten_1 (Flatten)         (None, 24)                0         
                                                                 
 dense_4 (Dense)             (None, 24)                600       
                                                                 
 dense_5 (Dense)             (None, 4)                 100       
                                                                 
Total params: 772
Trainable params: 772
Non-trainable params: 0
_________________________________________________________________


In [25]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [26]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=500, visualize=False, verbose=1)

Training for 500 steps ...
Interval 1 (0 steps performed)


ValueError: Error when checking input: expected dense_3_input to have 3 dimensions, but got array with shape (1, 1, 2, 2)