In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gym
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras import layers

In [166]:
# Define the grid world environment
class GridWorld(gym.Env):
    def __init__(self):
        super(GridWorld, self).__init__()
        self.grid_size = 5
        self.goal_position = np.array([self.grid_size - 1, self.grid_size - 1])
        self.agent_position = np.array([0, 0])

        self.action_space = gym.spaces.Discrete(4)  # Up, Down, Left, Right
        self.observation_space = gym.spaces.Discrete(self.grid_size * self.grid_size)

    def reset(self):
        self.agent_position = np.array([0, 0])
        return self._get_observation()

    def step(self, action):
        if action == 0:  # Up
            self.agent_position[0] = max(0, self.agent_position[0] - 1)
        elif action == 1:  # Down
            self.agent_position[0] = min(self.grid_size - 1, self.agent_position[0] + 1)
        elif action == 2:  # Left
            self.agent_position[1] = max(0, self.agent_position[1] - 1)
        elif action == 3:  # Right
            self.agent_position[1] = min(self.grid_size - 1, self.agent_position[1] + 1)

        done = np.array_equal(self.agent_position, self.goal_position)
        reward = 1.0 if done else 0.0

        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        return self.agent_position[0] * self.grid_size + self.agent_position[1]

In [167]:
# Define the Deep Q-Network
class QNetwork(Sequential):
    def __init__(self, state_size, action_size):
        super(QNetwork, self).__init__()
        self.add(Dense(24, input_dim=state_size, activation='relu'))
        self.add(Dense(24, activation='relu'))
        self.add(Dense(action_size, activation='linear'))
        self.compile(optimizer='adam', loss='mse')

In [168]:
# Deep Q-Learning agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.model = QNetwork(state_size, action_size)
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01

    def select_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.action_size)
        
        # Modify input shape for prediction
        q_values = self.model.predict(np.array([state]))[0]
        return np.argmax(q_values)

    def train(self, state, action, reward, next_state, done):
        target = reward
        if not done:
            target = (reward + 0.95 * np.amax(self.model.predict(np.array([next_state]))))
        target_f = self.model.predict(np.array([state]))
        target_f[0][action] = target
        self.model.fit(np.array([state]), target_f, epochs=1, verbose=0)

        # Decay exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [169]:
# Function to visualize the trained agent
def visualize_agent(agent, env):
    state = env.reset()
    done = False
    steps = 0

    while not done:
        action = agent.select_action(state)
        next_state, _, done, _ = env.step(action)
        state = next_state

        plt.clf()
        plt.imshow(np.zeros((env.grid_size, env.grid_size)), cmap='gray', vmin=0, vmax=1)
        plt.text(env.goal_position[1], env.goal_position[0], 'G', fontsize=12, ha='center', va='center', color='green')
        plt.text(env.agent_position[1], env.agent_position[0], 'A', fontsize=12, ha='center', va='center', color='red')
        plt.title(f"Step: {steps}")
        plt.pause(0.2)
        steps += 1

    plt.show()

In [170]:
# Main training loop
env = GridWorld()
state_size = env.observation_space.n
action_size = env.action_space.n

agent = DQNAgent(state_size, action_size)

episodes = 300
max_steps = 100

for episode in range(episodes):
    state = env.reset()
    for step in range(max_steps):
        action = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.train(state, action, reward, next_state, done)
        state = next_state
        if done:
            break



ValueError: in user code:

    File "C:\Users\zaida\anaconda3\lib\site-packages\keras\engine\training.py", line 2137, in predict_function  *
        return step_function(self, iterator)
    File "C:\Users\zaida\anaconda3\lib\site-packages\keras\engine\training.py", line 2123, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\zaida\anaconda3\lib\site-packages\keras\engine\training.py", line 2111, in run_step  **
        outputs = model.predict_step(data)
    File "C:\Users\zaida\anaconda3\lib\site-packages\keras\engine\training.py", line 2079, in predict_step
        return self(x, training=False)
    File "C:\Users\zaida\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\zaida\anaconda3\lib\site-packages\keras\engine\input_spec.py", line 250, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'q_network_3' (type QNetwork).
    
    Input 0 of layer "dense_69" is incompatible with the layer: expected min_ndim=2, found ndim=1. Full shape received: (None,)
    
    Call arguments received by layer 'q_network_3' (type QNetwork):
      • inputs=tf.Tensor(shape=(None,), dtype=int32)
      • training=False
      • mask=None


In [None]:
# Visualize the trained agent
visualize_agent(agent, env)