In [6]:
import import_ipynb
import random, datetime
from pathlib import Path

import gymnasium
from gymnasium.wrappers import FrameStack, GrayScaleObservation, TransformObservation
import gymnasium as gym

In [7]:
class SkipFrame(gym.Wrapper):
    def __init__(self, env, skip):
        """Return only every `skip`-th frame"""
        super().__init__(env)
        self._skip = skip

    def step(self, action):
        """Repeat action, and sum reward"""
        total_reward = 0.0
        done = False
        for i in range(self._skip):
            # Accumulate reward and repeat the same action
            obs, reward, done, truncated, info = self.env.step(action)
            total_reward += reward
            if done:
                break
        return obs, total_reward, done, truncated, info

In [8]:
import random, datetime
from pathlib import Path

import gymnasium
from gymnasium.wrappers import FrameStack, GrayScaleObservation, TransformObservation


from metrics import MetricLogger
from agent import SpaceInvader
from wrappers import ResizeObservation

env = gymnasium.make("ALE/SpaceInvaders-v5", mode=2,render_mode="human")



env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env, keep_dim=False)
env = ResizeObservation(env, shape=84)
env = TransformObservation(env, f=lambda x: x / 255.)
env = FrameStack(env, num_stack=4)

env.reset()

save_dir = Path('checkpoints') / datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S')
save_dir.mkdir(parents=True)

checkpoint = Path('checkpoints/2023-05-11T19-11-46/SpaceInvader_net_13.chkpt')
spaceInvader = SpaceInvader(state_dim=(4, 84, 84), action_dim=env.action_space.n, save_dir=save_dir, checkpoint=checkpoint)
spaceInvader.exploration_rate = spaceInvader.exploration_rate_min

logger = MetricLogger(save_dir)
episodes = 100
for e in range(episodes):

    state = env.reset()
    state = state[0]
    # Play the game!
    while True:

        # 3. Show environment (the visual) [WIP]
        # env.render()

        # 4. Run agent on the state
        action = spaceInvader.act(state)
        # 5. Agent performs action
        result = env.step(action)
        next_state = result[0]
        reward = result[1]
        done = result[2]
        info = result[3]
        
        # 6. Remember
        spaceInvader.cache(state, next_state, action, reward, done)

        # 7. Learn
        q, loss = spaceInvader.learn()

        # 8. Logging
        logger.log_step(reward, loss, q)

        # 9. Update state
        state = next_state
        # 10. Check if end of game
        if done or info:
            break
    

    logger.log_episode()

    if e % 20 == 0:
        logger.record(
            episode=e,
            epsilon=spaceInvader.exploration_rate,
            step=spaceInvader.curr_step
        )

ValueError: checkpoints/2023-05-11T19-11-46/SpaceInvader_net_13.chkpt does not exist