In [1]:
# TreasureHuntGame.ipynb Implementation
# Required Imports
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from GameExperience import GameExperience
from TreasureMaze import TreasureMaze

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Hyperparameters
epsilon = 0.1  # Exploration factor
learning_rate = 0.001
gamma = 0.95  # Discount factor
max_memory = 500
data_size = 50
epochs = 1000
batch_size = 10

In [None]:
# Initialize Maze
maze = np.array([
    [1, 1, 1, 0],
    [0, 0, 1, 0],
    [1, 1, 1, 1],
    [0, 0, 0, 1]
])
pirate_initial_position = (0, 0)
maze_env = TreasureMaze(maze, pirate_initial_position)

In [None]:
# Define Model
def build_model(input_shape, output_shape):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(24, activation='relu'),
        Dense(24, activation='relu'),
        Dense(output_shape, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

In [None]:
# Initialize Model
input_shape = maze_env.observe().shape
output_shape = 4  # Number of possible actions
model = build_model(input_shape, output_shape)
experience = GameExperience(model, max_memory=max_memory, discount=gamma)

In [None]:
# Training Loop
for epoch in range(epochs):
    maze_env.reset(pirate_initial_position)
    game_status = 'not_over'
    total_loss = 0
    while game_status == 'not_over':
        # Exploration vs Exploitation
        if np.random.rand() < epsilon:
            action = np.random.choice(maze_env.valid_actions())
        else:
            q_values = experience.predict(maze_env.observe())
            action = np.argmax(q_values)

        # Execute action and observe
        envstate, reward, game_status = maze_env.act(action)
        envstate_next = maze_env.observe()

        # Store experience
        experience.remember([envstate, action, reward, envstate_next, game_status == 'lose'])

        # Training
        inputs, targets = experience.get_data(data_size)
        loss = model.train_on_batch(inputs, targets)
        total_loss += loss

    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f}")

print("Training complete.")