In [1]:
import import_ipynb
from DQN.DoubleDQN import Agent
import gym
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras import losses
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import logging
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
from tensorflow.keras import optimizers
import lz4
#os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

importing Jupyter notebook from /home/deonix/maturarbeit-code/DQN/DoubleDQN.ipynb
1 Physical GPUs, 1 Logical GPUs


2022-06-27 16:46:55.243722: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-27 16:46:55.249128: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-27 16:46:55.249345: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-27 16:46:55.250235: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [5]:
#agent and environment optimized for Atari games
#https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf

class AtariGame:
    def __init__(self, env_name, render_mode=None, name=None):
        self.name = name
        self.episodes          = 5e9
        self.env_name          = env_name
        self.render_mode       = render_mode
        self.env               = wrap_deepmind(env = make_atari(self.env_name, render_mode=render_mode),
                                               frame_stack=True, scale=True)
        self.frame_skip        = 4
        self.replay_rate       = 4
        #84x84 greyscale
        #self.reduzed_size      = (84, 84, self.frame_skip) #, greyscale(1), 84x84, 4 Pictures, 
        self.state_size        = self.env.observation_space.shape
        self.action_size       = self.env.action_space.n
        self.termination_index = 1000
        self.env._max_episode_steps = self.termination_index
        self.history           = []
        self.save_freq         = 10000
        self.update_freq       = 10000
        self.target_score      = 500
        self.enable_target_score = False,
        self.random_frames     = 50000
        self.agent             = Agent(self.state_size, self.action_size,
                                     name=f"{self.env_name}-DoubleDQN-run2",
                                     anatomy=[layers.Conv2D(32, 8, strides=4, activation="relu"),
                                              layers.Conv2D(64, 4, strides=2, activation="relu"),
                                              layers.Conv2D(64, 3, strides=1, activation="relu"),
                                              layers.Flatten(),
                                              layers.Dense(512, activation="relu")],
                                     lr=0.00025,
                                     epsilon_min=0.1,
                                     linear_decrease=True,
                                     epsilon_decay=0.9/32000,
                                     disable_double=True,
                                     max_memory_size=1e5,
                                     gamma=0.99,
                                     optimizer=optimizers.Adam(learning_rate=0.00025, clipnorm=1.0),
                                     loss_function=losses.Huber())
                        
        #print(self.state_size, self.action_size)
    
    def run(self, load_model = False, skip_training=False, overwrite_epsilon=-1, save=True, logs=False, log_freq=1):
        #LOGS ARE ALWAYS ENABLED
        logging.basicConfig(filename=f"models/{self.agent.default_name}.log",
                level=logging.INFO,
                format='%(levelname)s: %(asctime)s %(message)s',
                datefmt='%d/%m/%Y %I:%M:%S')
        
        #if true, try to load existing model
        if load_model:
            self.agent.load_model(overwrite_epsilon=overwrite_epsilon)
            
        try:
            training_batches = 0
            frames = 0
            for index_episode in range(int(self.episodes)):
                state = self.env.reset()
                state = np.array(state)
                state_tensor = tf.expand_dims(state, 0)
                
                #state = np.array(state)
                score = 0 
                q = 0
                q_n = 0
    
                for index in range(0, self.termination_index):
            
                    if frames >= self.random_frames:
                        action = self.agent.pick_action(state_tensor)
                    else:
                        action = np.random.choice(self.action_size)
                        
                        
                    q += tf.reduce_max(self.agent.predict(state_tensor))
                    q_n += 1
                    
                    next_state, reward, done, _ = self.env.step(action)
                    next_state = np.array(next_state)
                    state_tensor = tf.expand_dims(state, 0)
                    
                    score += reward
                    
                    self.agent.update_memory(state=state, 
                                            reward=reward,
                                            action=action,
                                            state_next=next_state,
                                            done=done)
                    
                    if (frames % self.replay_rate) == 0:
                        training_batches += 1
                        self.agent.replay(debug=False)
                    
                    if (frames % self.update_freq) == 0:
                        self.agent.update_target()
                        logging.info(f"MODEL UPDATE")
                        
                    state = next_state
                    frames += 1
                    
                    if done:
                        break
                
                self.history.append(score)
                if len(self.history) > 100:
                    del self.history[:1]
                
                print(f"Episode: {index_episode:-10}")
                print(f"Score: {score:-12}")
                print(f"Epsilon: {self.agent.exploration_rate}")
                print("".join(["_" for i in range(10)]))
                if logs and index_episode % log_freq == 0:
                    running_reward = sum(self.history) / len(self.history)
                    logging.info(f"EPISODE: {index_episode}")
                    logging.info(f"AVG LAST {len(self.history)} REWARDS: {running_reward:0.2f}")
                    logging.info(f"BATCHES TRAINED: {training_batches}")
                    if q_n != 0:
                        logging.info(f"AVG Q VALUE: {(q / q_n):.5f}")
                    logging.info(f"SCORE: {score}")
                    logging.info(f"DURATION (STEPS): {index}")
                    logging.info(f"EPSILON: {self.agent.exploration_rate:.5f}")
                    logging.info(f"MEMORY SIZE: {len(self.agent.state_hist)}")
                    logging.info("".join(["-" for i in range(12)]))
                
                if index_episode % self.save_freq == 0 and save and index_episode != 0:
                    self.agent.save_model(name="EP"+str(index_episode/1000)+"k", save_memory=False)
                
                if score > self.target_score and self.enable_target_score:
                    self.agent.save_model(f"TARGET-{self.target_score}")
                    break
                    
        except:
            if save:
                self.agent.save_model(name="ERROR", save_memory=False)
            logging.info("".join(["+" for i in range(14)]))
            logging.exception("An error has occured")
            logging.info("".join(["+" for i in range(14)]))
            
        finally:
            #save model upon interrupting
            if logs or save:
                logging.info(f"TRAINING FINISHED AFTER {training_batches} BATCHES")
                logging.info("".join(["+" for i in range(14)]))
                if save:
                    model_name = self.agent.build_name(name="FINISHED")
                    logging.info(f"MODEL NAME: {model_name}")
                logging.info("".join(["+" for i in range(14)]))
            #print(training_batches)
            if save:
                self.agent.save_model(name="FINISHED", save_memory=True)

if __name__ == "__main__":
    #render modes: "human", None
    atari = AtariGame("BreakoutNoFrameskip-v4", render_mode=None)
    atari.run(load_model=False, skip_training=False, overwrite_epsilon=1, save=False, logs=True, log_freq=1)


Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 84, 84, 4)]       0         
                                                                 
 conv2d_9 (Conv2D)           (None, 20, 20, 32)        8224      
                                                                 
 conv2d_10 (Conv2D)          (None, 9, 9, 64)          32832     
                                                                 
 conv2d_11 (Conv2D)          (None, 7, 7, 64)          36928     
                                                                 
 flatten_3 (Flatten)         (None, 3136)              0         
                                                                 
 dense_7 (Dense)             (None, 512)               1606144   
                                                                 
 dense_8 (Dense)             (None, 4)                 2052

In [None]:
# env = wrap_deepmind(env = make_atari("BreakoutNoFrameskip-v4", render_mode=None),
#                                                frame_stack=True, scale=True)

In [None]:
# env.reset()