In [None]:
import import_ipynb
from DQN.SimpleDQN import Agent
import gym
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras import losses
import cv2
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import logging
#os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

In [None]:
#agent and environment optimized for Atari games
#https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf

class AtariGame:
    def __init__(self, env_name, render_mode=None, name=None):
        self.name = name
        self.sample_batch_size = 32
        self.episodes          = 10000
        self.env_name          = env_name
        self.env               = gym.make(env_name, obs_type="grayscale", render_mode=render_mode)
        self.frame_skip        = 4 #skip every 4th frame
        #84x84 greyscale
        self.reduzed_size      = (84, 84, self.frame_skip) #, greyscale(1), 84x84, 4 Pictures, 
        self.state_size        = self.reduzed_size
        self.action_size       = self.env.action_space.n
        self.termination_index = 10000
        self.agent             = Agent(self.state_size, self.action_size, 
                                       #Parameters taken from Deepmind Breakout AI
                                       #input model layers as keras.layers objects
                                       anatomy=[layers.Conv2D(16,8,strides=4,activation=activations.relu),
                                                layers.Conv2D(32,4,strides=2,activation=activations.relu),
                                                layers.Conv2D(64,3,strides=1,activation=activations.relu),
                                                layers.Flatten(),
                                                layers.Dense(512, activation=activations.relu)],
                                       name=f"{self.env_name}-DQN",
                                       linear_decrease=True,
                                       #epsilon=0.1, #DEBUG
                                       epsilon_decay=0.9/31000,
                                       epsilon_min=0.1,
                                       model_verbose=0,
                                       lr = 0.00025,
                                       gamma= 0.99,
                                       loss = losses.Huber(),
                                       max_memory_size=1.9e5
                                       )
        print(self.state_size, self.action_size)
        self.history           = []
        self.env.unwrapped.get_action_meanings()
    def reduce(self, state):
        res = cv2.resize(state, dsize=(84,110))
        #print(res.shape)
        margin = 110 - 84
        reduced = res[13:110-13]
        return np.reshape(reduced, (84, 84))
        
    def run(self, load_model = False, skip_training=False, overwrite_epsilon=-1, save=True, logs=False):
        #LOGS ARE ALWAYS ENABLED
        logging.basicConfig(filename=f"models/{self.env_name}-DQN.log",
                level=logging.INFO,
                format='%(levelname)s: %(asctime)s %(message)s',
                datefmt='%d/%m/%Y %I:%M:%S')
        
        self.env._max_episode_steps = 20000
        
        #self.env.render()
        
        #if true, try to load existing model
        if load_model:
            self.agent.load_model(overwrite_epsilon=overwrite_epsilon)
        try:
            training_batches = 0
            for index_episode in range(self.episodes):
                state = self.env.reset()
                state = self.reduce(state)
                done = False
                index = 1
                state_arr = []
                next_state_arr = []
                score = 0
                
                #logs performance
                q = 0
                q_n = 0
                #Real time Array with the last frames, used to generate an action
                state_action_arr = np.array([np.zeros([84,84]) for i in range(self.frame_skip)]) 
                
                while not done:
                    #print("CHOOSE ACTION")
                    #logging.info(str(state.shape))
                    #update state array queue
                    state_action_arr = np.roll(state_action_arr, shift=1)
                    state_action_arr[0] = state
                    
                    #first frames actions are random
    
                    if index > self.frame_skip:
                        #print(index)
                        #print(state_action_arr)
                        action = self.agent.pick_action(np.reshape(state_action_arr, (1, *self.reduzed_size)))
                    else:
                        #print("INIT")
                        action = np.random.choice(self.agent.action_size)
                    #print(action)
                    next_state, reward, done, _ = self.env.step(action)
                    score += reward
                    #print("action confirmed")
                    #next_state = np.reshape(next_state, [1, self.state_size])
                    next_state = self.reduce(next_state)
                    
                    #print(state.shape)
                    next_state_arr.append(next_state)
                    #print("FRAME")
                    
                    if (index % self.frame_skip == 0) and not skip_training:
                        #skip training on the first frameskip
                        if index > self.frame_skip:
                            #print("TRAIN")
                            #add q_value
                            #print(state_arr.shape)
                            q += np.amax(self.agent.predict(state_arr))
                            q_n += 1
                            self.agent.update_memory((state_arr, action, reward, 
                                                      np.reshape(np.array(next_state_arr),
                                                                 (1, *self.reduzed_size)),
                                                                done))
                            #replay "training"
                            training_batches += 1
                            self.agent.replay()
                        #print(np.array(next_state_arr).shape)
                        
                        
                        state_arr = np.reshape(np.array(next_state_arr), (1, *self.reduzed_size))
                        next_state_arr = []
                        
                    state = next_state
                    
                    #print(index, done, action)
                    if index > self.termination_index:
                        done = True
                    index += 1
                    
                print(f"Episode: {index_episode:-10}")
                print(f"Score: {score:-12}")
                print(f"Epsilon: {self.agent.exploration_rate}")
                print("".join(["_" for i in range(10)]))
                
                if logs:
                    logging.info(f"EPISODE: {index_episode}")
                    logging.info(f"FRAMES TRAINED: {training_batches * 32}")
                    logging.info(f"AVG Q VALUE: {(q / q_n):.5f}")
                    logging.info(f"SCORE: {score}")
                    logging.info(f"DURATION (STEPS): {index}")
                    logging.info(f"EPSILON: {self.agent.exploration_rate:.5f}")
                    logging.info(f"MEMORY SIZE: {len(self.agent.memory)}")
                    logging.info("".join(["-" for i in range(12)]))

                self.history.append(index)
                
                #save every 0.5k episodes
                if index_episode % 500 == 0 and save and index_episode != 0:
                    self.agent.save_model(name="EP"+str(index_episode))
        except:
            logging.info("".join(["+" for i in range(14)]))
            logging.exception("An error has occured")
            logging.info("".join(["+" for i in range(14)]))
        finally:
            #save model upon interrupting
            if logs:
                logging.info(f"TRAINING FINISHED AFTER {training_batches} BATCHES")
                logging.info("".join(["+" for i in range(14)]))
                if save:
                    model_name = self.agent.build_name(name="FINISHED")
                    logging.info(f"MODEL NAME: {model_name}")
                logging.info("".join(["+" for i in range(14)]))
            #print(training_batches)
            if save:
                self.agent.save_model(name="FINISHED")

if __name__ == "__main__":
    #render modes: "human", None
    atari = AtariGame("ALE/Breakout-v5", render_mode=None)
    atari.run(load_model=False, skip_training=False, overwrite_epsilon=-1, save=True, logs=True)
