In [7]:
# Import necessary libraries
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Input
from keras.optimizers import Adam

import zipfile
import os
import sys

# Ensure the extracted folder is added to the system path
sys.path.append('/content/TreasureHuntGame')  # Update this path if necessary

# Import game environment classes
from TreasureMaze import TreasureMaze
from GameExperience import GameExperience

# Define the 8x8 maze
maze = np.array([
    [1., 0., 1., 1., 1., 1., 1., 1.],
    [1., 0., 1., 1., 1., 0., 1., 1.],
    [1., 1., 1., 1., 0., 1., 0., 1.],
    [1., 1., 1., 0., 1., 1., 1., 1.],
    [1., 1., 0., 1., 1., 1., 1., 1.],
    [1., 1., 1., 0., 1., 0., 0., 0.],
    [1., 1., 1., 0., 1., 1., 1., 1.],
    [1., 1., 1., 1., 0., 1., 1., 1.]
])

# Initialize environment with the maze
env = TreasureMaze(maze)

# Print available methods to find the correct movement function
print(dir(env))  # Run this once to find the correct method

# Define state and action sizes if not provided by the environment
try:
    state_size = env.state_size
    action_size = env.action_size
except AttributeError:
    state_size = 64  # Assuming an 8x8 flattened grid
    action_size = 4  # Up, Down, Left, Right

# Build the deep Q-learning model
def build_model():
    model = Sequential([
        Input(shape=(state_size,)),  # ✅ Fix: Use Input() instead of input_dim
        Dense(24, activation='relu'),
        Dense(24, activation='relu'),
        Dense(action_size, activation='linear')
    ])
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
    return model

model = build_model()

# ✅ Fix: Pass the model when creating GameExperience
memory = GameExperience(model)

# Epsilon-greedy action selection
def choose_action(state, epsilon):
    if np.random.rand() < epsilon:
        return random.choice(env.valid_actions())
    state_input = np.array(state).reshape(1, -1)
    q_values = model.predict(state_input, verbose=0)
    return np.argmax(q_values[0])

# Training loop
for epoch in range(epochs):
    state = env.reset(pirate=(0, 0))  # ✅ Fix: Pass required pirate argument
    done = False
    total_reward = 0

    while not done:
        action = choose_action(state, epsilon)

        # ✅ Fix: Use the correct method to move the pirate
        try:
            next_state, reward, done = env.take_action(action)  # ✅ Replace with correct function
        except AttributeError:
            env.take_action(action)  # Move the pirate
            next_state = env.get_state()  # Get the new state
            reward = env.get_reward()  # Get the reward
            done = env.is_done()  # Check if the game is over

        total_reward += reward

        # Ensure memory is added in correct format
        memory.add((state, action, reward, next_state, done))
        state = next_state

        if len(memory) >= batch_size:
            batch = memory.sample(batch_size)
            states, actions, rewards, next_states, dones = zip(*batch)

            # Convert lists to NumPy arrays
            states = np.array(states)
            next_states = np.array(next_states)
            rewards = np.array(rewards)
            dones = np.array(dones)
            actions = np.array(actions)

            # Predict Q-values
            q_next = model.predict(next_states, verbose=0)
            targets = model.predict(states, verbose=0)

            for i in range(batch_size):
                targets[i][actions[i]] = rewards[i] if dones[i] else rewards[i] + gamma * np.max(q_next[i])

            model.train_on_batch(states, targets)

    # Update epsilon
    epsilon = max(epsilon * epsilon_decay, epsilon_min)
    print(f"Epoch: {epoch}  Total Reward: {total_reward}  Epsilon: {epsilon}")

# Test the trained agent
def play_game():
    state = env.reset(pirate=(0, 0))  # ✅ Fix: Pass required pirate argument
    done = False
    while not done:
        action = choose_action(state, epsilon_min)  # Use lowest epsilon for best learned behavior

        # ✅ Fix: Use the correct method to move the pirate
        try:
            state, _, done = env.take_action(action)
        except AttributeError:
            env.take_action(action)
            state = env.get_state()
            done = env.is_done()

        env.render()
    print("Game Over")

play_game()


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_maze', 'act', 'draw_env', 'free_cells', 'game_status', 'get_reward', 'maze', 'min_reward', 'observe', 'pirate', 'reset', 'state', 'target', 'total_reward', 'update_state', 'valid_actions', 'visited']


AttributeError: 'TreasureMaze' object has no attribute 'take_action'