**1. Import Dependencies**

In [10]:
# Import Gym Stuff
import gym
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

# Import helpers
import numpy as np
import random
import os

# Import stable baselines stuff
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [11]:
#Import Game Stuff
from snakeGame import SnakeGame

**2. Building the ENV**

In [12]:
class SnakeENV(Env):
        
    def __init__(self, num_envs=1) -> None:
        super(SnakeENV, self).__init__()
        metadata = {'render.modes': ['human',]}

        self.num_envs = num_envs
        self.game = [SnakeGame() for _ in range(self.num_envs)]

        self.action_space = Discrete(4)
        self.observation_space = Dict([('head_pos', Box(low=np.array([0, 0]), high=np.array([self.game.window.width, self.game.window.height]))),
                                        ('fruit_pos', Box(low=np.array([0, 0]), high=np.array([self.game.window.width, self.game.window.height]))),
                                        ('length', Box(low=np.array([len(self.game.snake.body)]), high=np.array([self.game.window.width * self.game.window.height]))),
                                        ('direction', Discrete(4))
                                ])

        self.time_run = 0

    def isEpisodeLenght(self):
        self.time_run += 1
        return self.time_run > 6600

    def step(self, action, render=True):
        #done = self.isEpisodeLenght()

        direction = self.action_to_direction(action)
        reward, done = self.game.main(direction, render=render, isAI=True)

        info = {}

        return self.state, reward, done, info

    def render(self, mode='human'):
        self.game.render(mode=mode)

    def reset(self):
        self.game = SnakeGame()
        self.time_run = 0
        self.state = {'head_pos': np.array(self.game.snake.head_pos).astype(int),
                        'fruit_pos': np.array(self.game.fruit.pos).astype(int),
                        'length': np.array(len(self.game.snake.body)).astype(int),
                        'direction': self.direction_to_action(self.game.snake.direction)
                    }

                    
        return self.state


    def action_to_direction(self, action):
        if action == 0:
            return 'UP'
        elif action == 1:
            return 'DOWN'
        elif action == 2:
            return 'LEFT'
        elif action == 3:
            return 'RIGHT'
    def direction_to_action(self, direction):
        if direction == 'UP':
            return 0
        elif direction == 'DOWN':
            return 1
        elif direction == 'LEFT':
            return 2
        elif direction == 'RIGHT':
            return 3


In [13]:
env = SnakeENV()
episodes = 5


for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    print(f'Episode: {episode} Score: {score}')
env.close()

Episode: 1 Score: -499
Episode: 2 Score: -492
Episode: 3 Score: -495
Episode: 4 Score: -500
Episode: 5 Score: -497


**3. Train a PPO model**

In [14]:
del env
#env = SnakeENV()
cpu_cores = 4
#env = DummyVecEnv([lambda: SnakeENV])
#env = SubprocVecEnv([lambda: SnakeENV for i in range(cpu_cores)])
#env = VecFrameStack(env, n_stack=4)

from stable_baselines3.common.env_util import make_vec_env
env = make_vec_env(SnakeENV, n_envs=4)

In [15]:
log_path = "./Training/Logs/"
model = PPO("MultiInputPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device


In [16]:
model.learn(total_timesteps=100000)

Logging to ./Training/Logs/PPO_15
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3.84     |
|    ep_rew_mean     | -497     |
| time/              |          |
|    fps             | 55       |
|    iterations      | 1        |
|    time_elapsed    | 147      |
|    total_timesteps | 8192     |
---------------------------------


Exception in thread Thread-5:
Traceback (most recent call last):
  File "/Users/alex/miniforge3/envs/gameAI/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/Users/alex/miniforge3/envs/gameAI/lib/python3.8/site-packages/tensorboard/summary/writer/event_file_writer.py", line 233, in run
    self._record_writer.write(data)
  File "/Users/alex/miniforge3/envs/gameAI/lib/python3.8/site-packages/tensorboard/summary/writer/record_writer.py", line 40, in write
    self._writer.write(header + header_crc + data + footer_crc)
  File "/Users/alex/miniforge3/envs/gameAI/lib/python3.8/site-packages/tensorboard/compat/tensorflow_stub/io/gfile.py", line 766, in write
    self.fs.append(self.filename, file_content, self.binary_mode)
  File "/Users/alex/miniforge3/envs/gameAI/lib/python3.8/site-packages/tensorboard/compat/tensorflow_stub/io/gfile.py", line 160, in append
    self._write(filename, file_content, "ab" if binary_mode else "a")
  File "/Users/alex/miniforge3

KeyboardInterrupt: 

In [None]:


#for i in range(100000):
#    actions, _states = model.predict(model.get_env().reset())
#    for j in range(env.num_envs):
#        model.learn(actions[j])
#        env.render()

In [None]:
ppo_path = "./Training/Models/PPO_Snake_Model_MoreData"
model.save(ppo_path)

In [None]:
del model

**4. Eval and Test**

In [None]:
ppo_path = "./Training/Models/PPO_Snake_Model_MoreData"

env = SnakeENV()
model = PPO.load(ppo_path, env)

In [None]:
evaluate_policy(model, env, n_eval_episodes=1, render=True)