## Exploration of Pygame Learning Environments

Objectives:
* Learn action space
* See visualization
* Provide testbed for trying different arguments and game specific hyper paramters

Games Tested:
* Catcher [1.0]
* PixelCopter [1.1]
* Pong [1.2]
* PuckWorld [1.3]
* RaycastMaze [1.5]
* Snake [1.6]
* Waterworld [1.7]
* MonsterKong [2.0]
* FlappyBird [2.1]

### Catcher [1.0]

* Catch falling blocks from the ceiling by touching with paddle
* at env start/reset, need to take an action/advance frames or screen will be blank
* three actions: left, right, None
* seems to be momentum from prior actions

In [26]:
import numpy as np
from ple import PLE
from ple.games.catcher import Catcher

game = Catcher()

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)

for i in range(100):
    action = action_list[np.random.randint(0,action_length)]
    ple_env.act(action)
    if ple_env.game_over():
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))


[100, 97, None]


In [None]:
#useful utilities
#ple_env.act() #like gym.step() except only returns reward 
#ple_env.getScreenGrayscale() #get grayscale image
#ple_env.getScreenRGB() #get RGB image
#ple_env.lives() check for remaining lives, can be used to check if reset for env is needed
#ple_env.reset_game() #resets the environment
#ple_env.NOOP #no action, advances the frames

### [1.1] Pixelcopter

* Flappy bird with pixels
* Flap or don't

In [25]:
import numpy as np
from ple import PLE
from ple.games.pixelcopter import Pixelcopter

game = Pixelcopter()

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)

for i in range(100):
    action = action_list[np.random.randint(0,action_length)]
    ple_env.act(action)
    if ple_env.game_over():
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

[119, None]


### [1.2] Pong

In [1]:
import numpy as np
from ple import PLE
from ple.games.pong import Pong

game = Pong() # create our game 

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)

lives_check = ple_env.lives() 

for i in range(100):
    action = action_list[np.random.randint(0,action_length)]
    ple_env.act(action)
    if lives_check != ple_env.lives():
        print('lives are different {}'.format(ple_env.lives()))
    if ple_env.game_over():
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

couldn't import doomish
Couldn't import doom
[115, 119, None]


### [1.3] PuckWorld
* grab green pucks, avoid red one

In [20]:
import numpy as np
from ple import PLE
from ple.games.puckworld import PuckWorld

game = PuckWorld() # create our game 

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)
print(ple_env.lives())
for i in range(1000):
    action = action_list[np.random.randint(0,action_length)]
    reward = ple_env.act(action)
    print(reward)
    if ple_env.game_over():
        print('game_over')
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

[115, 100, 119, 97, None]
-1
-217.97264343208525
-220.0602168950873
-218.15165301165268
-213.36342334083918
-209.31980018582453
-208.0605593907046
-214.3474880332517
-228.43290350606503
-232.5967320946138
-231.46370647515005
-219.6697275516367
-203.65271571529638
-195.28279019216643
-198.26504472941042
-207.34650512284452
-211.0524507763712
-207.58031342291952
-204.50276834203714
-201.77303846894392
-202.80539692156253
-208.552734678241
-214.18577445935625
-215.70027239217052
-209.28248360425005
-205.1296680331261
-215.48985083616753
-232.30200438627526
-231.31092545303545
-229.25745061658472
-229.8109915793475
-231.93645533269046
-229.1695544933964
-225.3163804327105
-226.88681585564518
-224.86797016244418
-222.82328490646705
-227.70791324040238
-229.52636915952462
-226.97006044567934
-220.784870777361
-215.965096024147
-219.39960413905646
-227.27823325294048
-234.9307372972471
-233.7139463062049
-220.72259536729507
-207.81695428722378
-193.33941617640266
-173.8586303952161
-158.71452

KeyboardInterrupt: 

### [1.4] Raycast
* Navitage a maze from first person view
* Search for an exit

In [23]:
import numpy as np
from ple import PLE
from ple.games.raycastmaze import RaycastMaze

game = RaycastMaze() # create our game 

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)
print(ple_env.lives())

for i in range(10000):
    action = action_list[np.random.randint(0,action_length)]
    reward = ple_env.act(action)
    if reward != 0:
        print(reward)
    if ple_env.game_over():
        print('game over')
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

[100, 119, 97, 115, None]
0
5.0
game over
5.0
game over


KeyboardInterrupt: 

### [1.5] Snake

In [17]:
import numpy as np
from ple import PLE
from ple.games.snake import Snake

game = Snake() # create our game

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)



# init agent and game.
ple_env.init()

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# >>> l = [1, 3, 2, 5, 4, None, 7]
# >>> sorted(l, key=lambda x: (x is None, x))
# [1, 2, 3, 4, 5, 7, None]

print(sorted(action_list, key=lambda x: (x is None, x)))
action_list = sorted(action_list, key=lambda x: (x is None, x))
print(action_list)

#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)

for i in range(1):
    action = action_list[np.random.randint(0,action_length)]
    ple_env.act(action)
    if ple_env.game_over():
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

[115, 100, 119, 97, None]
[97, 100, 115, 119, None]
[97, 100, 115, 119, None]


### [1.6] Waterworld
* Collect green circles, avoid red ones

In [35]:
import numpy as np
from ple import PLE
from ple.games.waterworld import WaterWorld

game = WaterWorld() # create our game 

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)

for i in range(100):
    action = action_list[np.random.randint(0,action_length)]
    ple_env.act(action)
    if ple_env.game_over():
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

[119, 100, 115, 97, None]


### [2.0] MonsterKong

In [1]:
import numpy as np
from ple import PLE
from ple.games.monsterkong import MonsterKong

game = MonsterKong() # create our game 

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)

# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)
lives_check = ple_env.lives()
print(lives_check)
old_reward = 0
current_reward = 0
for i in range(1000):
    action = action_list[np.random.randint(0,action_length)]
    current_reward += ple_env.act(action)
    if current_reward != old_reward:
        print("reward change {}".format(current_reward))
        old_reward = current_reward
    if ple_env.game_over() or ple_env.lives() != lives_check:
        print("game over/reset")
        ple_env.reset_game()
        print(ple_env.lives())
        ple_env.act(ple_env.NOOP)
        old_reward = 0
        current_reward = 0
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

couldn't import doomish
Couldn't import doom
[100, 97, 119, 115, 32, None]
0
reward change 0.1
reward change 0.20000000000000004
reward change 0.30000000000000004
reward change -0.7
game over/reset
0
reward change 0.1
reward change -0.9
game over/reset
0
reward change -1.0
game over/reset
0
reward change 0.1
reward change 0.2


KeyboardInterrupt: 

### [2.1] Flappybird

In [37]:
import numpy as np
from ple import PLE
from ple.games.flappybird import FlappyBird

game = FlappyBird(width=144,height=256)
#game = FlappyBird(width=120,height=144) # create our game 
#game = FlappyBird() # create our game 

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 2
force_fps = False # False for slower speed
display_screen = True

# make a PLE instance.
ple_env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
        force_fps=force_fps, display_screen=display_screen)

action_length = len(ple_env.getActionSet())
action_list = ple_env.getActionSet()
print(action_list)
print(ple_env.getScreenDims()[0]) #default is 288 by 512, 9x16 ratio
# init agent and game.
ple_env.init()
#advance game one frame, starts out at a black screen
ple_env.act(ple_env.NOOP)

for i in range(100):
    action = action_list[np.random.randint(0,action_length)]
    ple_env.act(action)
    if ple_env.game_over():
        ple_env.reset_game()
        ple_env.act(ple_env.NOOP)
    #ple_env.saveScreen("test_screen_capture_{}.png".format(i))

[119, None]
144


In [4]:
ple_env.game_over()

False

In [3]:
!python -m baselines.deepq.experiments.enjoy_ple_dqn  --env=catcher


/home/jim/anaconda3/envs/gym35/bin/python: Error while finding module specification for 'baselines.baselines.deepq.experiments.enjoy_ple_dqn' (ImportError: No module named 'baselines.baselines')


In [1]:
1e-4

0.0001

In [2]:
float(1e-4)

0.0001

In [13]:
import numpy as np

In [50]:
import numpy as np

def random_search_model():
#     parser.add_argument('--env', help='environment name', default='catcher')
#     parser.add_argument('--seed', help='RNG seed', type=int, default=0)
#     parser.add_argument('--prioritized', type=int, default=1)
#     parser.add_argument('--dueling', type=int, default=1)
#     parser.add_argument('--double_q', type=int, default=1)
#     parser.add_argument('--num-timesteps', type=int, default=int(4e6))
#     parser.add_argument('--exp_fraction',default=0.1)
#     parser.add_argument('--frame_skip', type=int, default=int(2))
#     parser.add_argument('--hold_action', type=int, default=int(2))
#     parser.add_argument('--learning_rate', type=float, default=float(1e-4))
#     parser.add_argument('--buffer_size', type=int, default=int(10000))
#     parser.add_argument('--train_freq', type=int, default=int(4))
#     parser.add_argument('--target_update', type=int, default=int(1000))
#     parser.add_argument('--pr_alpha', type=float, default=float(0.6)) #prioritized replay alpha
#     parser.add_argument('--pr_beta', type=float, default=float(0.4))
#     parser.add_argument('--pr_eps', type=float, default=float(1e-6))
#python -m baselines.deepq.experiments.run_ple_dqn  --env=waterworld --num-timesteps=10000 --prioritized=0 --dueling=0 --double_q=0  --exp_fraction=0.8 --frame_skip=3 --hold_action=3 --learning_rate=0.00008 --buffer_size=15000 --train_freq=3 --target_update=2000
    env = 'monsterkong'
    num_timesteps = np.random.randint(1000000,4000001)#1000
    prioritized = np.random.randint(0,5) #turned into a bool, weighing it more heavily to use this
    dueling = np.random.randint(0,5)
    double_q = np.random.randint(0,10)
    exp_fraction = np.round(np.random.uniform(0.6,0.98),2)
    frame_skip = np.random.randint(2,5)
    hold_action = frame_skip
    learning_rate = 10 **  (-1*np.random.randint(3,6) ) * np.random.randint(1,6)
    buffer_size = np.random.randint(10000,50001)
    train_freq =  np.random.randint(1,7)
    target_update = np.random.randint(1000,10001)

    model_string = "python -m baselines.deepq.experiments.run_ple_dqn --env={} --num-timesteps={} \
    --prioritized={} --dueling={} --double_q={}  --exp_fraction={} --frame_skip={} --hold_action={} \
    --learning_rate={} --buffer_size={} --train_freq={} --target_update={}".format(
        env,num_timesteps,
        prioritized,dueling,double_q,exp_fraction,frame_skip,
        hold_action,learning_rate,buffer_size,train_freq,target_update)
    print(model_string)
    ! $model_string

In [52]:
for i in range(2):
    random_search_model()

python -m baselines.deepq.experiments.run_ple_dqn --env=catcher --num-timesteps=100     --prioritized=1 --dueling=3 --double_q=2  --exp_fraction=0.88 --frame_skip=3 --hold_action=3     --learning_rate=0.003 --buffer_size=42717 --train_freq=2 --target_update=4854
couldn't import doomish
Couldn't import doom
Logging to /tmp/openai-2017-12-22-14-01-43-673276
2017-12-22 14:01:43.778704: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-12-22 14:01:43.778733: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-12-22 14:01:43.778741: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could 

In [18]:
np.random.randint(2,5,10)

array([3, 4, 2, 3, 3, 2, 3, 3, 2, 3])

In [15]:
np.random.uniform(0.6,0.98)

0.6567447703938406

In [38]:
10 **  (-1*np.random.randint(3,6) ) * np.random.randint(1,6)

0.00030000000000000003

In [21]:
10 ** -1

0.1

In [51]:
for i in range(2):
    print(i)

0
1
