diff --git a/Chapter 13/DQN.py b/Chapter 13/DQN.py index 09e1190..def9b29 100644 --- a/Chapter 13/DQN.py +++ b/Chapter 13/DQN.py @@ -3,6 +3,8 @@ # Importing the libraries import numpy as np +import gc +import keras # IMPLEMENTING DEEP Q-LEARNING WITH EXPERIENCE REPLAY @@ -35,10 +37,13 @@ class Dqn(object): inputs[i] = current_state targets[i] = model.predict(current_state)[0] Q_sa = np.max(model.predict(next_state)[0]) + gc.collect() + keras.backend.clear_session() if game_over: targets[i, action] = reward else: targets[i, action] = reward + self.discount * Q_sa + return inputs, targets diff --git a/Chapter 13/train.py b/Chapter 13/train.py old mode 100644 new mode 100755 index 92856e8..3cc5c40 --- a/Chapter 13/train.py +++ b/Chapter 13/train.py @@ -2,12 +2,16 @@ # Training the AI # Importing the libraries +import tensorflow as tf from environment import Environment from brain import Brain from DQN import Dqn import numpy as np import matplotlib.pyplot as plt - +import gc +import os +import keras + # Defining the parameters memSize = 60000 batchSize = 32 @@ -56,7 +60,9 @@ while True: if np.random.rand() < epsilon: action = np.random.randint(0, 4) else: - qvalues = model.predict(currentState)[0] + qvalues = model.predict(tf.convert_to_tensor(currentState))[0] + gc.collect() + keras.backend.clear_session() action = np.argmax(qvalues) # Updating the environment