In [10]:
import gym
from gym import spaces
import gym_ple
import numpy as np

### Flappy Bird Sem Visão

Definição do Ambiente

In [11]:
class FlappyBirdEnv(gym.Env):

  def __init__(self):
    self.env = gym.make("FlappyBird-v0")
    # Define action and observation space
    # They must be gym.spaces objects
    # Example when using discrete actions:
    self.action_space = self.env.action_space
    # Example for using image as input:
    self.observation_space = spaces.Box(low=np.array([0, -10.0, 0, 0, 0, 0, 0, 0]),
                                        high=np.array([512, 10.0, 588.0, 512, 512, 588.0, 512, 512]),
                                        dtype=np.float32)

  def step(self, action):
    observation, reward, done, info = self.env.step(action)
    observation = np.array(list(self.env.game_state.getGameState().values()))
    if done:
        reward = -1
    reward += 0.1
#     reward += (75 - abs(observation[0] - (observation[3] + observation[4])/2))*max((300 - observation[2])/300, 0)/750
    return observation, reward, done, info

  def reset(self):
    self.env.reset()
    observation = np.array(list(self.env.game_state.getGameState().values()))
    return observation  # reward, done, info can't be included

  def render(self):
    self.env.render()
    
  def close (self):
    self.env.close()

In [12]:
from stable_baselines.common.env_checker import check_env

env = FlappyBirdEnv()
check_env(env)

In [13]:
import gym

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common import set_global_seeds, make_vec_env
# from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2

env = make_vec_env(FlappyBirdEnv, n_envs = 16)
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
# env = DummyVecEnv([lambda: env])

In [14]:
model = PPO2(MlpPolicy, env, n_steps = 512, nminibatches = 64, lam = 0.98, gamma = 0.99, noptepochs= 10, ent_coef= 0.001, verbose=1)



In [67]:
model = PPO2.load("trained_models/PPO2_1280")
model.set_env(env)

Loading a model without an environment, this model cannot be trained until it has a valid environment.


In [71]:
# model.learn(total_timesteps=100000)
model.learn(total_timesteps=8192)

-------------------------------------
| approxkl           | 0.011902812  |
| clipfrac           | 0.078186035  |
| ep_len_mean        | 311          |
| ep_reward_mean     | 37.2         |
| explained_variance | 0.01         |
| fps                | 324          |
| n_updates          | 1            |
| policy_entropy     | 0.29404902   |
| policy_loss        | 0.0006345886 |
| serial_timesteps   | 512          |
| time_elapsed       | 0            |
| total_timesteps    | 8192         |
| value_loss         | 1.127012     |
-------------------------------------


<stable_baselines.ppo2.ppo2.PPO2 at 0x1f972e3a2b0>

In [69]:
flappyTest = FlappyBirdEnv()

In [70]:
obs = flappyTest.reset()
dones = 0
while not dones:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = flappyTest.step(action)
    flappyTest.render()

flappyTest.close()

In [60]:
model.save("trained_models/PPO2_1280")

### Flappy Bird CNN LSTM

In [11]:
ENV_NAME = "FlappyBird-v0"

import gym

from stable_baselines.common.policies import CnnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2

env_vision = gym.make(ENV_NAME)
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
env_vision = DummyVecEnv([lambda: env_vision])

In [None]:
modelVision = PPO2(CnnLstmPolicy, env_vision, n_steps = 512, nminibatches = 1, lam = 0.98, gamma = 0.999, noptepochs= 15, ent_coef= 0.01, verbose=1)

In [None]:
modelVision.learn(total_timesteps=10000)

In [None]:
obs = env_vision.reset()
dones = 0
while not dones:
    action, _states = modelVision.predict(obs)
    obs, rewards, dones, info = env_vision.step(action)
    env.render()

env_vision.close()