# Script d'entrainement de l'algorithme DQN sur le jeu catcher
Source: https://medium.com/@www.seymour/training-an-ai-to-play-a-game-using-deep-reinforcement-learning-b63534cfdecd

### Imports et rechargement des librairies

In [6]:
import numpy as np
from collections import deque
import pandas as pd
import os
import sys
import time
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from src import dqn, game
from datetime import datetime

import importlib
importlib.reload(game)
importlib.reload(dqn)

<module 'src.dqn' from 'c:\\Users\\basil\\Documents\\Work\\UQAC\\S2\\Projet\\IV - Projet\\Article 1 - DQN\\src\\dqn.py'>

### Paramètres du programme

In [7]:
# General params
EPISODES = 300

# Training params - score
base_score = 5

# Training params - Bellman equation
GAMMA = 0.95

# Training params - Exploration
EXPLORATION_DECAY = 0.95
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.001

# DQN params
BATCH_SIZE = 32
UPDATE_TARGET_EVERY = 4
LEARNING_RATE = 1e-4
LEARNING_RATE_DECAY = 0.99

# Other info for file
game_name = "Catcher"

# Simulation info:
content = "Simulation info : \nNb eps: " + str(EPISODES) + "\n" \
+ "Exploration decay: " + str(EXPLORATION_DECAY) + "\n" \
+ "Gamma: " + str(GAMMA) + "\n" \
+ "Base score: " + str(base_score) + "\n" \
+ "Game: " + game_name + "\n" \
+ "Target updated every : " + str(UPDATE_TARGET_EVERY) + "\n"


### Préparation d'un dossier pour enregister les données

In [8]:
current_datetime = datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d %H%M")
dest_dir = os.path.join("..", "simulations", formatted_datetime)
models_dir = os.path.join(dest_dir, "models")
info_file_path = os.path.join(dest_dir, "info.txt")
os.mkdir(dest_dir)
os.mkdir(models_dir)
with open(info_file_path, 'w') as file:
    file.write(content)

### Création et initialisation de l'environnement de jeu et de l'agent

In [9]:
env = game.Environment()
agent = dqn.DQN(
    state_shape=env.ENVIRONMENT_SHAPE,
    action_size=env.ACTION_SPACE_SIZE,
    batch_size=BATCH_SIZE,
    learning_rate_max=LEARNING_RATE,
    learning_rate_decay=LEARNING_RATE_DECAY,
    exploration_decay=EXPLORATION_DECAY,
    gamma=GAMMA,
    exploration_min=EXPLORATION_MIN,
    exploration_max=EXPLORATION_MAX
)

# Save network architecture to file
with open(info_file_path, 'a') as file:
    agent.model.summary(print_fn=lambda x: file.write(x + '\n'))

### Entrainement de l'agent

In [10]:
state = env.reset()
state = np.expand_dims(state, axis=0)
most_recent_losses = deque(maxlen=BATCH_SIZE)

df = pd.DataFrame(columns=["epoch", "step_num", "score", "agent_lr", 
                           "agent_er", "ma_loss", "time"])
# fill up memory
while agent.memory.length() < BATCH_SIZE:
    action = agent.act(state)
    next_state, reward, done, score = env.step(action)
    next_state = np.expand_dims(next_state, axis=0)
    agent.remember(state, action, reward, next_state, done)
    state = next_state

for e in range(EPISODES):
    time_start = time.time()
    state = env.reset()
    state = np.expand_dims(state, axis=0)
    done = False
    step = 0
    ma_loss = None
    while not done:
        action = agent.act(state)
        next_state, reward, done, score = env.step(action)
        next_state = np.expand_dims(next_state, axis=0)
        agent.remember(state, action, reward, next_state, done)

        state = next_state
        step += 1

        loss = agent.replay(episode=e)
        most_recent_losses.append(loss)
        ma_loss = np.array(most_recent_losses).mean()

        if loss != None:
            print(f"Step: {step}. Score: {score}. -- Loss: {loss}", end="          \r")

        if done:
            print(f"Episode {e}/{EPISODES-1} completed with {step} steps. Score: {score:.0f}. LR: {agent.learning_rate:.6f}. EP: {agent.exploration_rate:.2f}. MA loss: {ma_loss:.6f}")
            break
    if e % UPDATE_TARGET_EVERY == 0:
        agent.update_target_model()
    time_ttl = time.time() - time_start
    df.loc[len(df.index)] = [e, step, score, agent.learning_rate, 
                             agent.exploration_rate, ma_loss, time_ttl]
    df.to_csv(f"{dest_dir}/data.csv", index = False)
    

    agent.save(f'{models_dir}/{e}.keras')

Episode 0/299 completed with 198 steps. Score: -5. LR: 0.000100. EP: 1.00. MA loss: 0.004249
Episode 1/299 completed with 99 steps. Score: -5. LR: 0.000099. EP: 0.95. MA loss: 0.004696
Episode 2/299 completed with 35 steps. Score: -5. LR: 0.000098. EP: 0.90. MA loss: 0.004787
Episode 3/299 completed with 127 steps. Score: -5. LR: 0.000097. EP: 0.86. MA loss: 0.004186
Episode 4/299 completed with 63 steps. Score: -5. LR: 0.000096. EP: 0.81. MA loss: 0.003433
Episode 5/299 completed with 43 steps. Score: -5. LR: 0.000095. EP: 0.77. MA loss: 0.008488
Episode 6/299 completed with 50 steps. Score: -5. LR: 0.000094. EP: 0.74. MA loss: 0.005908
Episode 7/299 completed with 35 steps. Score: -5. LR: 0.000093. EP: 0.70. MA loss: 0.005043
Episode 8/299 completed with 67 steps. Score: -5. LR: 0.000092. EP: 0.66. MA loss: 0.004923
Episode 9/299 completed with 51 steps. Score: -5. LR: 0.000091. EP: 0.63. MA loss: 0.005650
Episode 10/299 completed with 40 steps. Score: -5. LR: 0.000090. EP: 0.60. MA 