In [20]:
import tensorflow as tf
import numpy as np
import random

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from collections import deque

In [21]:
class DQN:
  def __init__(self, state_dim, action_dim):
    self.state_dim = state_dim
    self.action_dim = action_dim

    self.main_model = self.create_model()
    self.target_model = self.create_model()
    self.target_model.set_weights(self.main_model.get_weights())

    self.target_update_delta = 0

  def create_model(self):
    model = Sequential()

    model.add(Dense(16, input_dim=self.state_dim, activation="relu"))
    model.add(Dense(16, activation="relu"))
    model.add(Dense(self.action_dim, activation="linear"))

    model.compile(optimizer="adam", loss="mean_squared_loss")

    return model

  def query(self, state):
    self.main_model.predict(np.array(state))

In [22]:
class Memory:
  def __init__(self, size):
    self.size = size
    self.replay_buffer = deque(maxlen=self.size)

  def add_memory(self, state, action, reward, next_state, done):
    self.replay_buffer.append((state, action, reward, next_state, done))

  def sample_memories(self, batch_size):
    return random.sample(self.replay_buffer, batch_size)

In [23]:
class Agent:
  def __init__(self, state_dim, action_dim, memory_size, epsilon):
    self.state_dim = state_dim
    self.action_dim = action_dim
    self.memory_size = memory_size
    self.epsilon = epsilon

    self.network = DQN(self.state_dim, self.action_dim)
    self.memory = Memory(self.memory_size)

  def choose_action(self, state):
    if np.random.random() > self.epsilon:
      return np.argmax(self.network.query(state))
    else:
      return np.random.randint(0, self.action_dim)