In [2]:
import gym 
from gym import spaces
import numpy as np
import pygame
from pygame.locals import *
from run import GameController
from constants import *
from pacman import Pacman
from ghost import Ghosts
from nodes import NodeGroup
from pellets import PelletGroup
from fruits import Fruits
from stable_baselines3 import DQN
from stable_baselines3 import PPO
import os

class PacmanEnv(gym.Env):
    metadata = {"render.modes": ["human"]}

class PacmanEnv(gym.Env):
    def __init__(self, render_mode=False):
        super(PacmanEnv, self).__init__()
        self.render_mode = render_mode

        # Ensure correct SDL driver
        if not render_mode:
            os.environ["SDL_VIDEODRIVER"] = "dummy"
        else:
            os.environ.pop("SDL_VIDEODRIVER", None)

        pygame.quit()  # Clean up previous states
        pygame.init()

        # Pass render_mode to GameController
        self.game = GameController(render_mode=render_mode)

        # Initialize the observation space
        self.action_space = spaces.Discrete(5, start=-2)
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(SCREENHEIGHT, SCREENWIDTH, 3), dtype=np.uint8
        )

    def reset(self):
        self.game.startGame(3)  # Rozpoczęcie gry
        state = self.get_observation()  # Pobranie stanu początkowego
        return state
    
    def step(self, action):
        action = int(action)  # Konwersja akcji na int
        action = np.clip(action, 0, 4)  # Upewnij się, że akcja mieści się w zakresie [0, 4]
    
        # Mappowanie akcji z {0, 1, 2, 3, 4} na {-2, -1, 0, 1, 2}
        action = action - 2
        if self.game.pacman.validDirection(action):
            self.game.pacman.direction = action 

        pelletBefore = self.game.pellets.numEaten 
        lifesBefore = self.game.pacman.life_amount   
        # Aktualizacja gry
        self.game.update()

        # Wymuszenie poprawności pozycji Pacmana
        if self.game.pacman.target is not None and self.game.pacman.overshotTarget():
            self.game.pacman.node = self.game.pacman.target
            self.game.pacman.setPosition()

        self.game.update()  # Ponowna aktualizacja stanu gry

        state = self.get_observation()  # Pobranie nowego stanu po akcji

        reward = 0

        #pellet = self.game.pacman.eatPellets(self.game.pellets.pelletList)
        pellet = self.game.pellets.numEaten - pelletBefore
        if pellet == 1:
            reward += 20  # Nagroda za zjedzenie pelletu

        #liczenie za owocki nie działa drodzy panstwo
        fruit = None
        if self.game.fruits is not None:  # Sprawdzenie, czy jest owoc
            fruit = self.game.pacman.eatFruits(self.game.fruits)
            if fruit:
                reward += 20  # Nagroda za zjedzenie owocu

        # Kara za bycie zjedzonym przez ducha
        lifes = self.game.pacman.life_amount - lifesBefore
        if lifes == -1:
            reward -= 50

        # Kara za brak punktów
        if pellet == 0 and fruit is None:
            reward -= 2

        done = self.check_game_over()  # Sprawdzenie, czy gra się skończyła

        info = {}

        return state, reward, done, info
    
    def render(self, mode="human"):
        if self.render_mode and mode == "human":
            self.game.render()  # Only render if enabled

    def get_observation(self):
        # Grab the screen as an observation
        return pygame.surfarray.array3d(self.game.screen)

    def _init_pygame(self):
        if not pygame.get_init():
            pygame.init()

    def close(self):
        pygame.quit()  # Zamknięcie pygame

    def check_game_over(self):
        return self.game.pacman.life_amount == 0  # Gra kończy się, gdy Pacman nie ma żyć
    
    def change_resolution(self, width, height):
        global SCREENWIDTH, SCREENHEIGHT

        # Nadpisanie wartości w pliku constants.py
        constants_path = os.path.join(os.path.dirname(__file__), "constants.py")
        with open(constants_path, "r") as file:
            lines = file.readlines()

        with open(constants_path, "w") as file:
            for line in lines:
                if line.startswith("SCREENWIDTH"):
                    file.write(f"SCREENWIDTH = {width}\n")
                elif line.startswith("SCREENHEIGHT"):
                    file.write(f"SCREENHEIGHT = {height}\n")
                else:
                    file.write(line)

        # Zaktualizowanie zmiennych globalnych w Pythonie
        SCREENWIDTH, SCREENHEIGHT = width, height
        
        # Ponowne załadowanie ustawień (zainicjalizowanie nowego okna gry)
        self.game.screen = pygame.display.set_mode((SCREENWIDTH, SCREENHEIGHT))  # Ustawienie nowego rozmiaru okna
        self.game.width, self.game.height = SCREENWIDTH, SCREENHEIGHT  # Przekazanie nowych 

    def get_observation(self):
        observation = pygame.surfarray.array3d(self.game.screen)  # Pobranie obrazu z gry
        # Zamień wymiary z (width, height, channels) na (height, width, channels)
        return np.transpose(observation, (1, 0, 2))  # Zamienia wymiary: (800, 600, 3) na (600, 800, 3)








In [None]:
#Main section to train and test the model
env = PacmanEnv(render_mode=False)  # Initialize in training mode
model = DQN("CnnPolicy", env, verbose=1, buffer_size=1000)

print("Training the model...")
model.learn(total_timesteps=2000)
model.save("pacman_dqn_model")
env.close()  # Properly close the training environment

    # TESTING MODE (Rendering enabled)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Training the model...
Switching to testing mode...
Game Over
-190


In [6]:
print("Switching to testing mode...")
env = PacmanEnv(render_mode=True)  # New environment with rendering enabled
state = env.reset()
rewardMain = 0
model = DQN("CnnPolicy", env, verbose=1, buffer_size=1000)

model.load("pacman_dqn_model.zip")

state = env.reset()
rewardMain = 0
for _ in range(1000):
    # Wybór akcji przez model
    action, _states = model.predict(state)
        
    # Wykonanie akcji w środowisku
    state, reward, done, info = env.step(action)
        
        # Renderowanie aktualnego stanu gry
    env.render()
    rewardMain += reward
#         print(rewardMain)

    # Jeśli gra się skończyła, wypisanie "Game Over" i zakończenie
    if done:
        print("Game Over")
        break
    
print(rewardMain)
    
env.close()  # Zamknięcie środowiska



Switching to testing mode...
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
-1978


In [1]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# Initialize the Pacman environment in training mode
env = PacmanEnv(render_mode=False)  # Use training mode (no rendering)
    
# Initialize PPO model with CnnPolicy
model = PPO("CnnPolicy", env, verbose=1, n_steps=256, batch_size=64, ent_coef=0.01)

print("Training the PPO model...")
model.learn(total_timesteps=100)
model.save("pacman_ppo_model")  # Save the trained PPO model
env.close()  # Properly close the training environment

    # TESTING MODE (Rendering enabled)
print("Switching to testing mode...")
env = PacmanEnv(render_mode=True)  # Create a new environment with rendering enabled
    
    # Load the trained PPO model
model = PPO.load("pacman_ppo_model", env=env)

state = env.reset()
rewardMain = 0

NameError: name 'PacmanEnv' is not defined

In [None]:
print("Trained PPO model:")
state = env.reset()
rewardMain = 0
for _ in range(1000):
    # Predict the next action using the trained PPO model
    action, _states = model.predict(state)

        # Execute the action in the environment
    state, reward, done, info = env.step(action)

        # Render the current game state
    env.render()
    rewardMain += reward

        # If the game ends, display "Game Over" and exit the loop
    if done:
    print("Game Over")
    break

print(f"Total reward during testing: {rewardMain}")
env.close()  # Close the testing environment


# Main section to train and test the model
from stable_baselines3 import A2C  # Import A2C zamiast PPO
from stable_baselines3.common.env_util import make_vec_env

# Initialize the Pacman environment in training mode
env = PacmanEnv(render_mode=False)  # Use training mode (no rendering)
    
    # Initialize A2C model with CnnPolicy
model = A2C("CnnPolicy", env, verbose=1, n_steps=5, ent_coef=0.01, learning_rate=0.0007, gamma=0.99)

print("Training the A2C model...")
model.learn(total_timesteps=100)  # Train the model
model.save("pacman_a2c_model")  # Save the trained A2C model
env.close()  # Properly close the training environment

    # TESTING MODE (Rendering enabled)
print("Switching to testing mode...")
env = PacmanEnv(render_mode=True)  # Create a new environment with rendering enabled
    
    # Load the trained A2C model
model = A2C.load("pacman_a2c_model", env=env)

state = env.reset()


In [None]:
print("Trained A2C model:")
state = env.reset()
rewardMain = 0
for _ in range(1000):
        # Predict the next action using the trained A2C model
    action, _states = model.predict(state)

        # Execute the action in the environment
    state, reward, done, info = env.step(action)

        # Render the current game state
    env.render()
    rewardMain += reward

        # If the game ends, display "Game Over" and exit the loop
    if done:
        print("Game Over")
        break

print(f"Total reward during testing: {rewardMain}")

env.close()  # Close the testing environment
