In [2]:
# FlappyBIrd-DDQN Experiment
# 2020/08/11 SYC 

# import sys
import models.ddqn as DDQN
import models.expStrategy.epsilonGreedy as EPSG
import envs.flappyBird as Game
import models.util as Util
import os
import logging
from tqdm import tqdm

# print(sys.path)
os.environ['SDL_VIDEODRIVER'] = 'dummy'
Util.test_gpu()

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
game = Game.FlappyBirdEnv()
game.reset()
NUM_STATE_FEATURES = game.get_num_state_features()
NUM_ACTIONS = game.get_num_actions()
BATCH_SIZE = 32
EPISODE_NUM = 20000
PRINT_EVERY_EPISODE = 20

exp_stg = EPSG.EpsilonGreedy(0.1, NUM_ACTIONS)
agent = DDQN.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, 1000, 0.9, 1e-5, exp_stg)

env_state = game.get_state()
state = agent.preprocess_state(env_state)

accum_reward = 0
bar = []
logging.info("Episode 1")
for episode in range(1, EPISODE_NUM + 1):
    
    if episode % PRINT_EVERY_EPISODE == 1:
        if episode > 1:
            bar.close()
            logging.info("Accumulated Reward: {} | Loss: {}".format(round(accum_reward / PRINT_EVERY_EPISODE), agent.get_metrics_loss()))
            logging.info("Episode {}".format(episode))
            agent.reset_metrics_loss()
            accum_reward = 0
        bar = tqdm(total = PRINT_EVERY_EPISODE)

    while not game.is_over():
        # state = agent.preprocess_state(env_state)
        action = agent.select_action(state)
        reward = game.act(action)
        env_state_prime = game.get_state()
        state_prime = agent.preprocess_state(env_state_prime)

        agent.add_buffer(state, action, reward, state_prime)
        is_update_target = agent.update(BATCH_SIZE)

        state = state_prime
        accum_reward += reward

    bar.update(1)        
    game.reset()


pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html
couldn't import doomish
Couldn't import doom
1 Physical GPUs, 1 Logical GPUs
2020-08-11 17:54:18,174 - Episode 1
100%|██████████| 20/20 [00:23<00:00,  1.16s/it]
2020-08-11 17:54:41,402 - Accumulated Reward: -5 | Loss: 40.433990478515625
2020-08-11 17:54:41,403 - Episode 21
 30%|███       | 6/20 [00:06<00:15,  1.08s/it]

KeyboardInterrupt: 

In [None]:
# CartPole-REINFORCE Experiment
# 2020/08/11 SYC 

import models.REINFORCE as REINFORCE
import models.expStrategy.epsilonGreedy as EPSG
import envs.cartPole as cartPole
import models.util as Util
import logging
import matplotlib as plt
from tqdm import tqdm

# env = cartPole.CartPoleEnv()
# print(env.get_num_actions())
# print(env.env.action_space.sample())
Util.test_gpu()

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
env = cartPole.CartPoleEnv()
env.reset()
NUM_STATE_FEATURES = env.get_num_state_features()
NUM_ACTIONS = env.get_num_actions()
BATCH_SIZE = 32
EPISODE_NUM = 2000
PRINT_EVERY_EPISODE = 20
LEARNING_RATE = 1e-4
REWARD_DISCOUNT = 0.9

exp_stg = EPSG.EpsilonGreedy(0.1, NUM_ACTIONS)
agent = REINFORCE.Agent((NUM_STATE_FEATURES, ), NUM_ACTIONS, REWARD_DISCOUNT, LEARNING_RATE, exp_stg)

state = env.get_state()

accum_reward = 0
bar = []
logging.info("Episode 1")
for episode in range(1, EPISODE_NUM + 1):
    
    if episode % PRINT_EVERY_EPISODE == 1:
        if episode > 1:
            bar.close()
            logging.info("Accumulated Reward: {} | Loss: {}".format(round(accum_reward / PRINT_EVERY_EPISODE), agent.get_metrics_loss()))
            logging.info("Episode {}".format(episode))
            agent.reset_metrics_loss()
            accum_reward = 0
        bar = tqdm(total = PRINT_EVERY_EPISODE)

    while not env.is_over():
        # env.render()
        action = agent.select_action(state)
        state_prime, reward, is_done, info = env.act(action)

        agent.add_buffer(state, action, reward, state_prime)
        # print(f'State: {state}, Action: {action}, Reward: {reward}, State_Prime: {state_prime}')

        state = state_prime
        accum_reward += reward

    agent.update()
    agent.reset_buffer()

    bar.update(1)        
    env.reset()

bar.close()

# Evaluate the model
agent.shutdown_explore()
agent.reset_metrics_loss()
while not env.is_over():
    env.render()
    action = agent.select_action(state)
    state_prime, reward, is_done, info = env.act(action)

    state = state_prime
    accum_reward += reward

logging.info("Accumulated Reward: {}".format(round(accum_reward / PRINT_EVERY_EPISODE)))