In [151]:
import sys
sys.path.append('../')
import capture
import game
import layout as lay
import textDisplay
from gymnasium import spaces
from torchvision import transforms as T

NUM_ACTIONS = 4

rules = capture.CaptureRules()

In [152]:
#TODO: run the algorithm on a random layout each time. Also, look into the random_layout generation function
LAYOUT_NAME = "../layouts/defaultCapture.lay"

In [153]:
layout = lay.get_layout(LAYOUT_NAME)

red_agents = capture.load_agents(True, '../baselineTeam', [])
blue_agents = capture.load_agents(False, '../baselineTeam', [])
agents = sum([list(el) for el in zip(red_agents, blue_agents)], [])

Loading agent team: /home/adrian/facultate/emai/AS/pacman-agent/pacman-contest/src/contest/baselineTeam.py
	Arguments: {}
Loading agent team: /home/adrian/facultate/emai/AS/pacman-agent/pacman-contest/src/contest/baselineTeam.py
	Arguments: {}


In [154]:
# Runs an entire game, you can use this for reference/understanding/debugging purposes.
# Other than that, it has no use.
def run_game():
    game = rules.new_game(layout = layout, agents = agents, display = textDisplay.NullGraphics(), length = 1200, mute_agents = False, catch_exceptions = False)
    agent_index = 0
    agent_index = 0
    num_agents = 4

    states_explored = 0
    while not game.game_over:
        agent = agents[agent_index]
        
        observation = agent.observation_function(game.state.deep_copy())
        action = agent.get_action(observation)
        # print(game.state)
        states_explored += 1
        # print(states_explored)
        game.state = game.state.generate_successor(agent_index, action)
        game.move_history.append((agent_index, action))
        
        rules.process(game.state, game)
        agent_index = (agent_index + 1) % num_agents

In [155]:
import gymnasium as gym
import numpy as np
import stable_baselines3 as sb3

HEIGHT = 256
WIDTH = 256
NUM_AGENTS = 4

class PacmanEnv(gym.Env):
    """Custom Environment that follows gym interface."""

    metadata = {"render_modes": ["human"], "render_fps": 30}

    def __init__(self, rules, max_episode_length):
        super().__init__()
        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        self.action_space = spaces.Discrete(NUM_ACTIONS)
        # Example for using image as input (channel-first; channel-last also works):
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(HEIGHT, WIDTH), dtype=np.uint8)
        
        self.rules = rules
        self.max_episode_length = max_episode_length
        # TODO: do we really need to call this in constuctor?
        self.reset()
        
    def get_state_image(self, state):
        game_obs_str = str(state)
        game_obs = np.frombuffer(''.join(game_obs_str.split('\n')[:-2]).encode(), dtype=np.uint8)
        game_obs = np.reshape(game_obs, (self.height, self.width))
        game_obs = T.Resize((HEIGHT, WIDTH), T.InterpolationMode.BILINEAR)(T.ToTensor()(game_obs))
        return game_obs
        

    def step(self, action):
        # TODO: need translation of action from "spaces.Discrete" to our action space "(NORTH, SOUTH, EAST, WEST)"
        # also look into using the legal actions from the info dict
        agent = agents[self.agent_index]
        
        obs = agent.observation_function(self.game.state.deep_copy())
        # FOR NOW USE BASELINE AGENT ACTION
        action = agent.get_action(obs)
        # print(game.state)
        
        self.game.state = self.game.state.generate_successor(self.agent_index, action)
        self.game.move_history.append((self.agent_index, action))
        
        rules.process(self.game.state, self.game)
        self.agent_index = (self.agent_index + 1) % NUM_AGENTS
        
        terminated = self.game.game_over
        
        # TODO: the observation needs to be the conversion from the game state to an image (the spaces.Box function)
        game_obs = self.get_state_image(self.game.state)
        # TODO (requirement: previous todo): ideally, we need to do multi-observation (by additionally using
        #  a feature vector containing for example the noisy distances to the enemies). More information on how this is done here:
        # https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/envs/multi_input_envs.html#SimpleMultiObsEnv 
        # TODO: create a reward function and return reward.
        reward = 0
        # TODO: find out what truncated and info are and if we need them
        truncated = False
        info = {'legal_actions': self.game.state.get_legal_actions(self.agent_index)}
        
        return game_obs, reward, terminated, truncated, info

    def reset(self, seed=None, options=None):
        self.game = rules.new_game(layout = layout, agents = agents, display = textDisplay.NullGraphics(), length = self.max_episode_length, mute_agents = False, catch_exceptions = False)
        self.agent_index = 0
        for agent in agents:
            agent.register_initial_state(self.game.state.deep_copy())
        # TODO: same as before with the observation
        self.height = layout.height
        self.width = layout.width
        
        game_obs = self.get_state_image(self.game.state)
        
        # TODO: same as before with the info
        info = {'legal_actions': self.game.state.get_legal_actions(self.agent_index)}
        return game_obs, info

    # Probably not needed since we can display the game using the replay method.
    def render(self):
        pass

    # TODO: find out what this is for
    def close(self):
        pass

rl = sb3.DQN('MlpPolicy', PacmanEnv(rules, 1200),buffer_size = 10_000 ,verbose=1)

rl.learn(total_timesteps=10000)

Red team starts
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Red team starts
3
3
3
2
3
3
2
3
3
0
3
2
2
2
3
2
3
2
0
0
2
3
2
3
0
2
0
3
3
2
0
0
1
1
2
3
3
1
0
2
1
3
2
3
3
1
3
1
0
0
1
2
0
1
1
0
2
0
3
1
2
0
0
1
0
1
1
2
1
3
1
0
0
0
2
3
1
0
0
1
2
3
3
3
1
1
3
0
3
1
1
2
1
3
1
3
0
1
1
0
0
1
0
3
0
2
0
3
3
3
0
1
1
3
3
3
0
0
1
3
0
2
3
0
1
3
3
3
1
0
0
3
1
0
2
0
2
2
2
1
0
0
3
1
3
1
0
0
2
1
3
0
2
2
2
0
2
3
3
2
1
2
0
0
3
3
1
2
0
0
1
2
1
2
3
2
3
3
1
0
1
0
0
0
0
1
3
2
2
3
1
2
1
3
0
0
0
0
3
1
1
0
1
3
0
0
2
0
2
3
2
2
2
2
2
2
2
0
1
1
0
2
1
2
1
0
0
0
1
3
1
2
0
3
0
0
3
2
1
3
0
3
3
0
1
0
0
0
1
0
0
1
2
3
0
2
3
0
2
3
3
1
2
3
1
1
2
2
3
2
3
0
3
3
0
3
0
1
3
1
2
1
3
2
3
0
2
1
1
3
2
2
3
3
0
0
0
1
1
0
0
2
3
3
0
0
1
2
2
3
2
2
0
0
1
2
1
2
2
2
2
0
3
3
0
1
0
3
1
1
2
0
0
1
3
1
2
3
2
1
2
3
2
2
1
3
2
0
2
1
1
1
3
2
3
3
3
0
2
1
2
3
3
2
0
3
3
0
1
2
1
3
1
2
1
2
1
3
0
2
0
0
0
3
2
1
2
0
2
2
1
1
0
0
3
0
2
0
2
3
0
0
1
3
1
1
2
0
2
0
0
1
1
2
3
1
0
3
3
1
1
3
2
0
3
3
2
0
1
0
3
3
0
2
0
1
1

KeyboardInterrupt: 