In [1]:
import gym
gym.__version__

'0.18.0'

In [2]:
env = gym.make('SpaceInvaders-v0')

In [3]:
env.action_space #6 possible actions to take in this environment.

Discrete(6)

In [4]:
episodes = 10

for episode in range(1, episodes):
    state = env.reset() #Reset environment to initial state
    done = False 
    score = 0
    
    while not done:
        env.render()
        state, reward, done, info = env.step(env.action_space.sample())
        
        #Add reward to score.
        score += reward
        
    print(f'Episode: {episode}, reward: {score}')
    
env.close()

Episode: 1, reward: 155.0
Episode: 2, reward: 50.0
Episode: 3, reward: 110.0
Episode: 4, reward: 45.0
Episode: 5, reward: 65.0
Episode: 6, reward: 210.0
Episode: 7, reward: 125.0
Episode: 8, reward: 150.0
Episode: 9, reward: 155.0


### _The agent is taking random actions. Implement a neural network where the agent learns intelligent behaviours based on the rewards that we give it, following reinforcement learning principles that humans follow._

In [5]:
env.observation_space

Box(0, 255, (210, 160, 3), uint8)

In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

2022-11-14 14:55:42.341053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-14 14:55:43.232299: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/debonair/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2022-11-14 14:55:43.232360: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-14 14:55:43.327627: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alre

In [7]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Conv2D(filters = 32, kernel_size = (8, 8), strides = (4, 4), activation = 'relu', input_shape = (3, height, width, channels)))
    model.add(Conv2D(filters = 64, kernel_size = (4, 4), strides = (2, 2), activation = 'relu'))
    model.add(Flatten())
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))

    return model

In [8]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [9]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr = 'eps', value_max = 1., value_min = .1, 
                                  value_test = .2, nb_steps=1000)
    
    memory = SequentialMemory(limit = 2000, window_length = 3)
    
    dqn = DQNAgent(model = model, memory = memory, policy = policy, 
                   enable_dueling_network=True, dueling_type='avg',
                   nb_actions = actions, nb_steps_warmup = 1000)
    
    return dqn

In [10]:
height, width, channels = env.observation_space.shape
actions = env.action_space.n

model = build_model(height, width, channels, actions)

In [11]:
dqn = build_agent(model, actions)

In [12]:
dqn.compile(Adam(learning_rate = 0.0001))

2022-11-14 14:57:16.471347: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/debonair/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2022-11-14 14:57:16.471482: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2022-11-14 14:57:16.471570: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (debonair): /proc/driver/nvidia/version does not exist
2022-11-14 14:57:16.472537: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-14 14:57:16.579301

In [14]:
dqn.fit(env, 
        nb_steps = 40_000, #Same as epochs
        visualize = False, #Visualize agent in environment when training.
        verbose = 1)

Training for 40000 steps ...
Interval 1 (0 steps performed)
 1114/10000 [==>...........................] - ETA: 16:48 - reward: 0.2783done, took 127.409 seconds


<keras.callbacks.History at 0x7fb29e475580>

### Test the model.

In [15]:
scores = dqn.test(env, nb_episodes = 5, visualize = True)
print(np.mean(scores.history['episode_reward']))

Testing for 5 episodes ...
Episode 1: reward: 380.000, steps: 853
Episode 2: reward: 380.000, steps: 860
Episode 3: reward: 355.000, steps: 861
Episode 4: reward: 380.000, steps: 838
Episode 5: reward: 380.000, steps: 853
375.0


### Save and load model.

In [18]:
#Save weights.
dqn.save_weights('models/dqn.h5f')

In [19]:
#Load model weights.
dqn.load_weights('models/dqn.h5f')