In [1]:
import gym 
from gym import spaces
import numpy as np
import pygame
from pygame.locals import *
from run import GameController
from constants import *
from pacman import Pacman
from ghost import Ghosts
from nodes import NodeGroup
from pellets import PelletGroup
from fruits import Fruits
from stable_baselines3 import DQN
from stable_baselines3 import PPO
import os
import time
class PacmanEnv(gym.Env):
    metadata = {"render.modes": ["human"]}

class PacmanEnv(gym.Env):
    def __init__(self, render_mode=False):
        super(PacmanEnv, self).__init__()
        self.render_mode = render_mode

        if not render_mode:
            os.environ["SDL_VIDEODRIVER"] = "dummy"
        else:
            os.environ.pop("SDL_VIDEODRIVER", None)

        pygame.quit() 
        pygame.init()

        self.game = GameController(render_mode=render_mode)

        self.action_space = spaces.Discrete(5, start=-2)
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(SCREENHEIGHT, SCREENWIDTH, 3), dtype=np.uint8
        )

    def reset(self):
        self.game.startGame(3)
        state = self.get_observation()
        return state
    
    def step(self, action):
        action = int(action)
        action = np.clip(action, 0, 4)
    
        action = action - 2
        if self.game.pacman.validDirection(action):
            self.game.pacman.direction = action 

        pelletBefore = self.game.pellets.numEaten 
        lifesBefore = self.game.pacman.life_amount   
        self.game.update()

        if self.game.pacman.target is not None and self.game.pacman.overshotTarget():
            self.game.pacman.node = self.game.pacman.target
            self.game.pacman.setPosition()

        self.game.update() 

        state = self.get_observation() 

        reward = 0

        pellet = self.game.pellets.numEaten - pelletBefore
        if pellet == 1:
            reward += 20

        fruit = None
        if self.game.fruits is not None:
            fruit = self.game.pacman.eatFruits(self.game.fruits)
            if fruit:
                reward += 20

        lifes = self.game.pacman.life_amount - lifesBefore
        if lifes == -1:
            reward -= 50


        if pellet == 0 and fruit is None:
            reward -= 2

        done = self.check_game_over()

        info = {}

        return state, reward, done, info
    
    def render(self, mode="human"):
        if self.render_mode and mode == "human":
            self.game.render()

    def get_observation(self):
        return pygame.surfarray.array3d(self.game.screen)

    def _init_pygame(self):
        if not pygame.get_init():
            pygame.init()

    def close(self):
        pygame.quit()

    def check_game_over(self):
        return self.game.pacman.life_amount == 0
    
    def change_resolution(self, width, height):
        global SCREENWIDTH, SCREENHEIGHT

        constants_path = os.path.join(os.path.dirname(__file__), "constants.py")
        with open(constants_path, "r") as file:
            lines = file.readlines()

        with open(constants_path, "w") as file:
            for line in lines:
                if line.startswith("SCREENWIDTH"):
                    file.write(f"SCREENWIDTH = {width}\n")
                elif line.startswith("SCREENHEIGHT"):
                    file.write(f"SCREENHEIGHT = {height}\n")
                else:
                    file.write(line)

        SCREENWIDTH, SCREENHEIGHT = width, height
        
        self.game.screen = pygame.display.set_mode((SCREENWIDTH, SCREENHEIGHT))
        self.game.width, self.game.height = SCREENWIDTH, SCREENHEIGHT

    def get_observation(self):
        observation = pygame.surfarray.array3d(self.game.screen)
        return np.transpose(observation, (1, 0, 2))








In [3]:
env = PacmanEnv(render_mode=False)
model = DQN("MlpPolicy", env, verbose=1, buffer_size=1000)

print("Training the model...")
for i in range(10):
    model.learn(total_timesteps=5000)
    env.reset
    print(i)

model.save("pacman_10x5000dqn_model")
env.close()


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




Training the model...


TypeError: Ghosts.update_ghosts() missing 1 required positional argument: 'pacman'

In [2]:
print("Switching to testing mode...")
env = PacmanEnv(render_mode=True)
state = env.reset()
rewardMain = 0
model = DQN("MlpPolicy", env, verbose=1, buffer_size=1000)

model.load("pacman_5x10000_dqn_model.zip")

state = env.reset()

rewards = []
durations = []
times=[]
episodes = 20
max_episode_time = 5 * 60
scores=[]
for episode in range(episodes):
    state = env.reset()
    episode_reward = 0
    steps = 0
    steps_time = 0
    done = False
    episode_start_time = time.time()
    score=0
    print("starting episode" + str(episode))
    while not done:
        if time.time() - episode_start_time >= max_episode_time:
            print(f"Episode {episode} exceeded 10 minutes, stopping early.")
            break
        steptime = time.time()
        action, _ = model.predict(state)
        print(action)
        state, reward, done, _ = env.step(action)
        episode_reward += reward
        steps_time += time.time() - steptime
        steps += 1
        env.render()
    scores.append(env.game.score)
    times.append(steps_time/steps)
    rewards.append(episode_reward)
    durations.append(steps)
env.close()
mean_score = 0
for score in scores:
    mean_score+=score
mean_score = mean_score/episodes
file = open("tests.txt", "a")
file.write("DQN model for " + str(episodes) + " episodes\n")    
file.write("score: " + str(mean_score))    

for i in range(len(rewards)):
    file.write("episode" + str(i)+ " :" + "reward " + str(rewards[i]) + " duration: " + str(durations[i]) + " mean step time: " + str(times[i])+"\n")     
file.close()



Switching to testing mode...
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


Exception: code() argument 13 must be str, not int
Exception: code() argument 13 must be str, not int


starting episode0
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
starting episode1
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3


KeyboardInterrupt: 

In [1]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

env = PacmanEnv(render_mode=False)
    
model = PPO("CnnPolicy", env, verbose=1, n_steps=256, batch_size=64, ent_coef=0.01)

print("Training the PPO model...")
for i in range(10):
    model.learn(total_timesteps=5000)
    env.reset
    print(i)

model.save("pacman_10x5000ppo_model")
env.close() 

NameError: name 'PacmanEnv' is not defined

In [None]:
print("Switching to testing mode...")
env = PacmanEnv(render_mode=True)
state = env.reset()
rewardMain = 0
model = DQN("MlpPolicy", env, verbose=1, buffer_size=1000)

model.load("pacman_50000_dqn_model.zip")

state = env.reset()

rewards = []
durations = []
times=[]
episodes = 20
max_episode_time = 5 * 60
scores=[]
for episode in range(episodes):
    state = env.reset()
    episode_reward = 0
    steps = 0
    steps_time = 0
    done = False
    episode_start_time = time.time()
    score=0
    print("starting episode" + str(episode))
    while not done:
        if time.time() - episode_start_time >= max_episode_time:
            print(f"Episode {episode} exceeded 10 minutes, stopping early.")
            break
        steptime = time.time()
        action, _ = model.predict(state)
        print(action)
        state, reward, done, _ = env.step(action)
        episode_reward += reward
        steps_time += time.time() - steptime
        steps += 1
        env.render()
    scores.append(env.game.score)
    times.append(steps_time/steps)
    rewards.append(episode_reward)
    durations.append(steps)
env.close()
mean_score = 0
for score in scores:
    mean_score+=score
mean_score = mean_score/episodes
file = open("tests.txt", "a")
file.write("DQN model for " + str(episodes) + " episodes\n")    
file.write("score: " + str(mean_score))    

for i in range(len(rewards)):
    file.write("episode" + str(i)+ " :" + "reward " + str(rewards[i]) + " duration: " + str(durations[i]) + " mean step time: " + str(times[i])+"\n")     
file.close()



In [None]:
print("Trained PPO model:")
state = env.reset()
rewardMain = 0
for _ in range(1000):
    action, _states = model.predict(state)

    state, reward, done, info = env.step(action)

    env.render()
    rewardMain += reward

    if done:
        print("Game Over")
        break

print(f"Total reward during testing: {rewardMain}")
env.close()

In [4]:
from stable_baselines3.common.save_util import load_from_zip_file

# Wczytanie danych bezpośrednio z pliku ZIP
data, params, _ = load_from_zip_file("pacman_50000_dqn_model.zip")

policy_params = params['policy']

# Przeglądanie wszystkich wag
for param, value in policy_params.items():
    print(f"{param}: wartości =\n{value.detach().cpu().numpy()}")


q_net.q_net.0.weight: wartości =
[[ 2.3925076e-04 -6.5672200e-04  5.3224515e-04 ...  5.3460681e-04
   3.4163723e-04 -7.2052394e-04]
 [ 5.9212238e-04 -4.4889134e-04  7.8002218e-04 ... -3.8859915e-04
  -6.8005506e-04 -1.2634526e-04]
 [ 3.0158254e-04 -7.8318124e-05  4.6163899e-05 ... -5.7490659e-04
  -6.3181494e-04 -8.2540227e-04]
 ...
 [-7.7338779e-04  3.3001532e-04 -5.0240004e-04 ...  8.0900890e-04
   6.5231504e-04  7.0917537e-04]
 [ 6.9504359e-04 -5.4917444e-04  5.4113590e-04 ...  1.4747500e-04
   1.0414750e-04 -6.4103416e-04]
 [-3.1208736e-04 -3.3204953e-04 -9.6093914e-05 ...  3.7679175e-04
   1.5308202e-04  4.0787220e-04]]
q_net.q_net.0.bias: wartości =
[-1.1789298e-03 -6.2681836e-05 -4.2216206e-04 -7.8597368e-04
  4.5443727e-03 -7.6337933e-04  5.7958992e-04 -2.5106332e-04
  6.7077945e-03 -8.1022724e-04  2.7512228e-03  7.0024695e-04
 -1.2908038e-03 -4.5241287e-04 -1.5699057e-03 -1.9009312e-04
 -5.6728095e-05  1.2117593e-04 -3.0146159e-05  4.4372808e-03
 -8.0119423e-04 -4.0292513e-04 