In [2]:
import import_ipynb
from DQN.SimpleDQNtest import Agent
import gym
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras import losses
import cv2
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import logging
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
#os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

importing Jupyter notebook from /home/deonix/maturarbeit-code/DQN/SimpleDQNtest.ipynb
1 Physical GPUs, 1 Logical GPUs


2022-06-12 15:52:33.552509: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-12 15:52:33.558201: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-12 15:52:33.558434: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-12 15:52:33.559362: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [None]:
#agent and environment optimized for Atari games
#https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf

class AtariGame:
    def __init__(self, env_name, render_mode=None, name=None):
        self.name = name
        self.sample_batch_size = 32
        self.episodes          = 5e6
        self.env_name          = env_name
        self.env               = wrap_deepmind(env = make_atari(self.env_name), frame_stack=True, scale=True)
        self.frame_skip        = 4 #skip every 4th frame
        #84x84 greyscale
        self.reduzed_size      = (84, 84, self.frame_skip) #, greyscale(1), 84x84, 4 Pictures, 
        self.state_size        = self.reduzed_size
        self.action_size       = self.env.action_space.n
        self.termination_index = 10000
        self.history           = []
        self.save_freq         = 10000
        self.agent             = Agent(self.state_size, self.action_size, 
                                       #Parameters taken from Deepmind Breakout AI
                                       #input model layers as keras.layers objects
                                       anatomy=[layers.Conv2D(16,8,strides=4,activation=activations.relu),
                                                layers.Conv2D(32,4,strides=2,activation=activations.relu),
                                                layers.Flatten(),
                                                layers.Dense(256, activation=activations.relu)],
                                       name=f"{self.env_name}-DQN",
                                       linear_decrease=True,
                                       epsilon=1,
                                       epsilon_decay=0.9/31000,
                                       epsilon_min=0.1,
                                       model_verbose=0,
                                       lr = 0.0025,
                                       gamma= 0.99,
                                       loss = losses.Huber(),
                                       max_memory_size=0.7e5
                                       )
        #print(self.state_size, self.action_size)
    
    def run(self, load_model = False, skip_training=False, overwrite_epsilon=-1, save=True, logs=False, log_freq=1):
        #LOGS ARE ALWAYS ENABLED
        logging.basicConfig(filename=f"models/{self.env_name}-DQN.log",
                level=logging.INFO,
                format='%(levelname)s: %(asctime)s %(message)s',
                datefmt='%d/%m/%Y %I:%M:%S')
        
        #if true, try to load existing model
        if load_model:
            self.agent.load_model(overwrite_epsilon=overwrite_epsilon)
        try:
            training_batches = 0
            for index_episode in range(int(self.episodes)):
                state = tf.expand_dims(np.array(self.env.reset()), 0)
                done = False
                score = 0 
                q = 0
                q_n = 0
                
                for index in range(0, self.termination_index):
                    if index > self.frame_skip:
                        action = self.agent.pick_action(state) 
                    else:
                        action = np.random.choice(self.agent.action_size)
                    
                    
                    next_state, reward, done, _ = self.env.step(action)
                    score += reward
                    next_state = tf.expand_dims(np.array(next_state), 0)
                    #print(next_state.numpy().shape)
                    
                    self.agent.update_memory((state.numpy(), action, reward, 
                                                      next_state.numpy(), done))
                    
                    if (index % self.frame_skip == 0) and not skip_training:
                        if index > self.frame_skip:
                            q += np.amax(self.agent.predict(state))
                            q_n += 1

                            training_batches += 1
                            self.agent.replay()                     
                    state = next_state

                    if done:
                        break
                self.history.append(score)
                if len(self.history) > 100:
                    del self.history[:1]
                
                print(f"Episode: {index_episode:-10}")
                print(f"Score: {score:-12}")
                print(f"Epsilon: {self.agent.exploration_rate}")
                print("".join(["_" for i in range(10)]))
                if logs and index_episode % log_freq == 0:
                    running_reward = sum(self.history) / len(self.history)
                    logging.info(f"EPISODE: {index_episode}")
                    logging.info(f"AVG LAST {len(self.history)} REWARDS: {running_reward:0.2f}")
                    logging.info(f"BATCHES TRAINED: {training_batches}")
                    logging.info(f"AVG Q VALUE: {(q / q_n):.5f}")
                    logging.info(f"SCORE: {score}")
                    logging.info(f"DURATION (STEPS): {index}")
                    logging.info(f"EPSILON: {self.agent.exploration_rate:.5f}")
                    logging.info(f"MEMORY SIZE: {len(self.agent.memory)}")
                    logging.info("".join(["-" for i in range(12)]))
                
                if index_episode % self.save_freq == 0 and save and index_episode != 0:
                    self.agent.save_model(name="EP"+str(index_episode/1000)+"k", save_memory=False)
                
        except:
            if save:
                self.agent.save_model(name="ERROR", save_memory=True)
            logging.info("".join(["+" for i in range(14)]))
            logging.exception("An error has occured")
            logging.info("".join(["+" for i in range(14)]))
            
        finally:
            #save model upon interrupting
            if logs or save:
                logging.info(f"TRAINING FINISHED AFTER {training_batches} BATCHES")
                logging.info("".join(["+" for i in range(14)]))
                if save:
                    model_name = self.agent.build_name(name="FINISHED")
                    logging.info(f"MODEL NAME: {model_name}")
                logging.info("".join(["+" for i in range(14)]))
            #print(training_batches)
            if save:
                self.agent.save_model(name="FINISHED", save_memory=True)

if __name__ == "__main__":
    #render modes: "human", None
    atari = AtariGame("BreakoutNoFrameskip-v4", render_mode=None)
    atari.run(load_model=True, skip_training=False, overwrite_epsilon=-1, save=True, logs=True, log_freq=25)


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 84, 84, 4)]       0         
                                                                 
 conv2d_2 (Conv2D)           (None, 20, 20, 16)        4112      
                                                                 
 conv2d_3 (Conv2D)           (None, 9, 9, 32)          8224      
                                                                 
 flatten_1 (Flatten)         (None, 2592)              0         
                                                                 
 dense_4 (Dense)             (None, 256)               663808    
                                                                 
 dense_5 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 677,172
Trainable params: 677,172
Non-trainab

2022-06-12 15:53:40.121262: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2022-06-12 15:53:40.399967: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-12 15:53:40.400521: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-12 15:53:40.400539: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-06-12 15:53:40.400870: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-12 15:53:40.400959: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


Episode:          0
Score:          0.0
Epsilon: 0.1
__________
Episode:          1
Score:          0.0
Epsilon: 0.1
__________
Episode:          2
Score:          0.0
Epsilon: 0.1
__________
Episode:          3
Score:          0.0
Epsilon: 0.1
__________
Episode:          4
Score:          0.0
Epsilon: 0.1
__________
Episode:          5
Score:          0.0
Epsilon: 0.1
__________
Episode:          6
Score:          2.0
Epsilon: 0.1
__________
Episode:          7
Score:          0.0
Epsilon: 0.1
__________
Episode:          8
Score:          0.0
Epsilon: 0.1
__________
Episode:          9
Score:          0.0
Epsilon: 0.1
__________
Episode:         10
Score:          0.0
Epsilon: 0.1
__________
Episode:         11
Score:          0.0
Epsilon: 0.1
__________
Episode:         12
Score:          1.0
Epsilon: 0.1
__________
Episode:         13
Score:          1.0
Epsilon: 0.1
__________
Episode:         14
Score:          1.0
Epsilon: 0.1
__________
Episode:         15
Score:          0.0
