# 1. Zależności

In [1]:
!pip install tensorflow==2.3.1 gym keras-rl2 gym[atari]

You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


# 2. Losowy agent w środowisku OpenAI Gym

In [1]:
import gym 
import random

In [2]:
env = gym.make('SpaceInvaders-v0')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [3]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [26]:
episodes = 100
total_score = 0
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = random.choice([0,1,2,3,4,5])
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
    total_score += score
print('Avg score: {}'.format(str(total_score/episodes)))
env.close()

Episode:1 Score:80.0
Episode:2 Score:75.0
Episode:3 Score:110.0
Episode:4 Score:30.0
Episode:5 Score:120.0
Episode:6 Score:240.0
Episode:7 Score:155.0
Episode:8 Score:135.0
Episode:9 Score:430.0
Episode:10 Score:45.0
Episode:11 Score:50.0
Episode:12 Score:155.0
Episode:13 Score:155.0
Episode:14 Score:50.0
Episode:15 Score:50.0
Episode:16 Score:135.0
Episode:17 Score:75.0
Episode:18 Score:80.0
Episode:19 Score:120.0
Episode:20 Score:90.0
Episode:21 Score:160.0
Episode:22 Score:20.0
Episode:23 Score:155.0
Episode:24 Score:30.0
Episode:25 Score:80.0
Episode:26 Score:320.0
Episode:27 Score:290.0
Episode:28 Score:135.0
Episode:29 Score:20.0
Episode:30 Score:270.0
Episode:31 Score:300.0
Episode:32 Score:240.0
Episode:33 Score:300.0
Episode:34 Score:120.0
Episode:35 Score:135.0
Episode:36 Score:105.0
Episode:37 Score:105.0
Episode:38 Score:210.0
Episode:39 Score:120.0
Episode:40 Score:260.0
Episode:41 Score:80.0
Episode:42 Score:75.0
Episode:43 Score:120.0
Episode:44 Score:95.0
Episode:45 Sco

# 3. Sieć neuronowa w Kerasie

In [5]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

In [6]:
def build_model(height, width, channels, actions):
    input_shape = (3, height, width, channels)
    
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=input_shape))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [19]:
tf.Graph().as_default()
model = build_model(height, width, channels, actions)

In [20]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 22, 16, 64)     36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 67584)             0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               34603520  
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_8 (Dense)              (None, 6)                

# 3. Agent w Keras-RL

In [9]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [14]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=1000)
    return dqn

In [21]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

In [27]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

Training for 10000 steps ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
  708/10000: episode: 1, duration: 16.068s, episode steps: 708, steps per second:  44, episode reward: 110.000, mean reward:  0.155 [ 0.000, 30.000], mean action: 2.376 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
 1379/10000: episode: 2, duration: 470.332s, episode steps: 671, steps per second:   1, episode reward: 105.000, mean reward:  0.156 [ 0.000, 30.000], mean action: 2.516 [0.000, 5.000],  loss: 16.554533, mean_q: 8.911127, mean_eps: 0.892945
 2295/10000: episode: 3, duration: 1107.539s, episode steps: 916, steps per second:   1, episode reward: 120.000, mean reward:  0.131 [ 0.000, 30.000], mean action: 2.563 [0.000, 5.000],  loss: 1.398586, mean_q: 8.688404, mean_eps: 0.834715
 2866/10000: episode: 4, duration: 669.088s, episode steps: 571, steps per second:   1, episode reward:  5.000, mean reward:  0.009 [ 0.000,  5.000], m

<tensorflow.python.keras.callbacks.History at 0x7f94502cee80>

In [24]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 105.000, steps: 674
Episode 2: reward: 140.000, steps: 926
Episode 3: reward: 70.000, steps: 613
Episode 4: reward: 75.000, steps: 564
Episode 5: reward: 395.000, steps: 901
Episode 6: reward: 20.000, steps: 661
Episode 7: reward: 410.000, steps: 1131
Episode 8: reward: 105.000, steps: 755
Episode 9: reward: 170.000, steps: 846
Episode 10: reward: 245.000, steps: 704
Episode 11: reward: 270.000, steps: 894
Episode 12: reward: 100.000, steps: 665
Episode 13: reward: 130.000, steps: 894
Episode 14: reward: 55.000, steps: 395
Episode 15: reward: 165.000, steps: 698
Episode 16: reward: 90.000, steps: 642
Episode 17: reward: 100.000, steps: 655
Episode 18: reward: 420.000, steps: 1344
Episode 19: reward: 560.000, steps: 984
Episode 20: reward: 80.000, steps: 511
Episode 21: reward: 135.000, steps: 656
Episode 22: reward: 240.000, steps: 916
Episode 23: reward: 170.000, steps: 658
Episode 24: reward: 345.000, steps: 1191
Episode 25: reward: 60.

# 4. Zapis i odczyt agenta z pamięci

In [22]:
dqn.save_weights('SavedWeights/SpaceInvaders/10k-dqn-weights.h5f')

In [22]:
dqn.load_weights('SavedWeights/SpaceInvaders/10k-dqn-weights.h5f')

In [17]:
del model

In [18]:
del dqn

NameError: name 'dqn' is not defined