In [1]:
import gym

In [2]:

import atari_py
print(atari_py.list_games())

['adventure', 'air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis', 'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival', 'centipede', 'chopper_command', 'crazy_climber', 'defender', 'demon_attack', 'double_dunk', 'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar', 'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kaboom', 'kangaroo', 'krull', 'kung_fu_master', 'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan', 'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing', 'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down', 'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']


In [3]:
env = gym.make('SpaceInvaders-v0')

In [4]:
env.action_space
#輸出是6 代表有6種action

Discrete(6)

In [5]:
episodes=10
for episode in range(1,episodes):
    #每次都重設環境
    state= env.reset()
    #done 如果agent完成任務就結束
    done = False
    score =0
    
    while not done:
        env.render()
        #.sample代表作random
        state,reward,done,info=env.step(env.action_space.sample())
        
        score +=reward
    print('Episode: {}\nScore :{}'.format(episode,score))
env.close()

Episode: 1
Score :60.0
Episode: 2
Score :165.0
Episode: 3
Score :300.0
Episode: 4
Score :120.0
Episode: 5
Score :105.0
Episode: 6
Score :210.0
Episode: 7
Score :180.0
Episode: 8
Score :65.0
Episode: 9
Score :125.0


In [27]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

In [28]:
height,width,channels=env.observation_space.shape
actions = env.action_space.n

In [29]:
def build_model(height,width,channels, actions):
    model = Sequential()
    model.add(Conv2D(32,(8,8),strides=(4,4),activation='relu',input_shape=(3,height,width,channels)))
    model.add(Conv2D(64,(4,4),strides=(2,2),activation='relu'))
    model.add(Flatten())
    model.add(Dense(512,activation='relu'))
    model.add(Dense(256,activation='relu'))
    model.add(Dense(actions,activation='relu'))
    return model

In [30]:
#del model

In [31]:
model=build_model(height,width,channels,actions)

In [32]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy


In [37]:
def build_agent(model,actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),attr='eps',value_max=1.,value_min=.1,value_test=.2,nb_steps=10000)
    memory=SequentialMemory(limit=2000,window_length=3)
    dqn=DQNAgent(model=model,memory=memory,policy=policy,
                 enable_dueling_network=True,dueling_type='avg',
                nb_steps_warmup=1000,nb_actions=actions)
    return dqn

In [38]:
dqn=build_agent(model,actions)

In [39]:
dqn.compile(Adam(lr=0.001))

In [40]:
dqn.fit(env,nb_steps=40000,visualize=True,verbose=1)

Training for 40000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
15 episodes - episode_reward: 146.000 [30.000, 470.000] - loss: 121.993 - mean_q: 8.658 - mean_eps: 0.505 - ale.lives: 1.996

Interval 2 (10000 steps performed)
16 episodes - episode_reward: 100.625 [20.000, 300.000] - loss: 0.348 - mean_q: 8.209 - mean_eps: 0.100 - ale.lives: 2.023

Interval 3 (20000 steps performed)
13 episodes - episode_reward: 206.538 [30.000, 490.000] - loss: 1.339 - mean_q: 8.482 - mean_eps: 0.100 - ale.lives: 2.009

Interval 4 (30000 steps performed)
done, took 28993.411 seconds


<tensorflow.python.keras.callbacks.History at 0x1cd6dec1408>

In [42]:
scores=dqn.test(env,nb_episodes=10,visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: 75.000, steps: 647
Episode 2: reward: 105.000, steps: 643
Episode 3: reward: 105.000, steps: 715
Episode 4: reward: 105.000, steps: 649
Episode 5: reward: 75.000, steps: 639
Episode 6: reward: 105.000, steps: 700
Episode 7: reward: 105.000, steps: 643
Episode 8: reward: 50.000, steps: 638
Episode 9: reward: 105.000, steps: 658
Episode 10: reward: 105.000, steps: 638
93.5


In [None]:
dqn.save_weights('models/dqn.h5f')

In [None]:
dqn.load_weights('models/dqn.h5f')