In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
from deap import base, creator, tools, algorithms

# Environment configuration
GRID_SIZE = 10              # 10x10 grid
GOAL_POS = [9, 9]           # goal position
OBSTACLES = [[3, 3], [3, 4], [4, 3], [4, 4], [7, 6], [6, 7]]  # optional obstacles
MAX_STEPS = 50              # max steps per game round
INPUT_SIZE = 6              # 6 sensors (4 directions to goal + 2 enhanced features)
HIDDEN_SIZE = 10            # 10 neurons in hidden layer
OUTPUT_SIZE = 4             # 4 outputs (up, down, left, right)
GENOME_LENGTH = (INPUT_SIZE * HIDDEN_SIZE + HIDDEN_SIZE + 
                 HIDDEN_SIZE * OUTPUT_SIZE + OUTPUT_SIZE)  # 106 weights

# Genetic algorithm parameters
POPULATION_SIZE = 500       # number of individuals in population
MUTATION_RATE = 0.2         # probability of mutation
MUTATION_INDPB = 0.1        # probability of each gene being mutated
CROSSOVER_RATE = 0.7        # probability of crossover
TOURNAMENT_SIZE = 5         # size of tournament selection
ELITISM_SIZE = 20           # number of best individuals to keep
MAX_GENERATIONS = 200       # number of generations to run
N_GAME_ROUNDS = 3           # number of game rounds to evaluate fitness

# Clear existing DEAP classes to avoid conflicts
if "FitnessMax" in creator.__dict__:
    del creator.FitnessMax
if "Individual" in creator.__dict__:
    del creator.Individual

# Create DEAP fitness and individual classes
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# Enhanced sensory functions
def sense_environment(pos, goal, obstacles=None):
    """
    Return enhanced sensory inputs about the environment.
    
    Args:
        pos: Current position [x, y]
        goal: Goal position [x, y]
        obstacles: List of obstacle positions (optional)
        
    Returns:
        List of sensory inputs:
        - Normalized distances to goal in 4 directions
        - Distance to nearest obstacle (if any)
        - Normalized Manhattan distance to goal
    """
    x, y = pos
    gx, gy = goal
    
    # 1-4. Distance to goal in each direction (normalized, closer = higher value)
    dist_up = max(0, gy - y) / GRID_SIZE
    dist_down = max(0, y - gy) / GRID_SIZE
    dist_left = max(0, gx - x) / GRID_SIZE
    dist_right = max(0, x - gx) / GRID_SIZE
    
    # 5. Normalized Manhattan distance to goal (closer = higher value)
    manhattan_dist = (abs(x - gx) + abs(y - gy)) / (2 * GRID_SIZE)
    manhattan_proximity = 1 - manhattan_dist  # invert: closer = higher value
    
    # 6. Obstacle detection (distance to nearest obstacle in agent's direction)
    obstacle_sensor = 1.0  # default: no obstacle detected (normalized to [0,1])
    
    if obstacles:
        # Find nearest obstacle (if any)
        min_dist = float('inf')
        for ox, oy in obstacles:
            dist = ((x - ox) ** 2 + (y - oy) ** 2) ** 0.5  # Euclidean distance
            if dist < min_dist:
                min_dist = dist
        
        # Normalize obstacle distance (closer = lower value)
        if min_dist < GRID_SIZE:
            obstacle_sensor = min(1.0, min_dist / GRID_SIZE)
    
    return [
        1 - dist_up,        # closer = higher value 
        1 - dist_down,
        1 - dist_left,
        1 - dist_right,
        manhattan_proximity,
        obstacle_sensor
    ]

# Neural network function
def nn_function(inp, wei):
    """
    Implement neural network computation with ReLU activation in hidden layer.
    
    Args:
        inp: List of sensory inputs
        wei: Genome (weights) of neural network
        
    Returns:
        List of output values (one per action)
    """
    # Extract weights from genome
    w_ih = np.array(wei[:INPUT_SIZE * HIDDEN_SIZE]).reshape(INPUT_SIZE, HIDDEN_SIZE)
    b_h = np.array(wei[INPUT_SIZE * HIDDEN_SIZE:INPUT_SIZE * HIDDEN_SIZE + HIDDEN_SIZE])
    w_ho = np.array(wei[INPUT_SIZE * HIDDEN_SIZE + HIDDEN_SIZE:-OUTPUT_SIZE]).reshape(HIDDEN_SIZE, OUTPUT_SIZE)
    b_o = np.array(wei[-OUTPUT_SIZE:])
    
    # Compute hidden layer (ReLU activation)
    hidden = np.dot(inp, w_ih) + b_h
    hidden = np.maximum(0, hidden)  # ReLU activation
    
    # Compute output layer (softmax for better probability distribution)
    output = np.dot(hidden, w_ho) + b_o
    
    # Apply softmax to get probability distribution
    exp_output = np.exp(output - np.max(output))  # Subtract max for numerical stability
    output_probs = exp_output / exp_output.sum()
    
    return output_probs

# Navigation function
def nn_navigate_me(me, inp):
    """
    Determine agent's movement based on neural network outputs.
    
    Args:
        me: Agent genome (neural network weights)
        inp: Sensory inputs
        
    Returns:
        Move: 0=up, 1=down, 2=left, 3=right
    """
    outputs = nn_function(inp, me)
    
    # Choose direction with highest output
    return np.argmax(outputs)

# Helper function to simulate a single game round
def simulate_game_round(me, start_pos=None):
    """
    Simulate a single game round for an agent.
    
    Args:
        me: Agent genome
        start_pos: Starting position (random if None)
        
    Returns:
        tuple: (score, steps_taken, reached_goal)
    """
    # Initialize game with random or specified starting position
    if start_pos is None:
        pos = [random.randint(0, GRID_SIZE-1), random.randint(0, GRID_SIZE-1)]
    else:
        pos = start_pos.copy()
    
    # Skip if starting at goal (retry with a different position)
    if pos == GOAL_POS:
        return simulate_game_round(me)
    
    steps = 0
    previous_positions = []  # Track visited positions to detect loops
    reached_goal = False
    
    # Simulate game round
    while steps < MAX_STEPS and not reached_goal:
        # Sense environment
        inp = sense_environment(pos, GOAL_POS, OBSTACLES)
        
        # Decide move
        move = nn_navigate_me(me, inp)
        
        # Store current position before moving
        previous_positions.append(tuple(pos))
        
        # Update position based on selected move
        new_pos = pos.copy()
        if move == 0 and pos[1] < GRID_SIZE-1:  # up
            new_pos[1] += 1
        elif move == 1 and pos[1] > 0:  # down
            new_pos[1] -= 1
        elif move == 2 and pos[0] > 0:  # left
            new_pos[0] -= 1
        elif move == 3 and pos[0] < GRID_SIZE-1:  # right
            new_pos[0] += 1
        
        # Check if new position is an obstacle
        if OBSTACLES and [new_pos[0], new_pos[1]] in OBSTACLES:
            # Stay in current position if would hit obstacle
            pass
        else:
            # Move to new position
            pos = new_pos
        
        # Check if goal reached
        if pos == GOAL_POS:
            reached_goal = True
        
        steps += 1
        
        # Detect loops (revisiting same position more than twice)
        if previous_positions.count(tuple(pos)) > 2:
            break  # Penalize looping behavior
    
    # Calculate score based on performance
    if reached_goal:
        # Reward for reaching goal (inversely proportional to steps)
        base_score = MAX_STEPS * 2  # Base score for reaching goal
        speed_bonus = MAX_STEPS - steps  # Bonus for reaching quickly
        score = base_score + speed_bonus
    else:
        # Score based on Manhattan distance to goal if goal not reached
        dist = abs(pos[0] - GOAL_POS[0]) + abs(pos[1] - GOAL_POS[1])
        max_dist = GRID_SIZE * 2  # Maximum possible distance
        score = max(0, max_dist - dist)  # Higher score for closer positions
        
        # Penalize for revisiting positions (loops)
        loop_penalty = sum(previous_positions.count(p) - 1 for p in set(previous_positions))
        score = max(1, score - loop_penalty)  # Ensure minimum score of 1
    
    return score, steps, reached_goal

# Fitness function
def handle_mes_fitnesses(mes):
    """
    Compute fitness for a list of agents based on multiple game rounds.
    
    Args:
        mes: List of agent genomes
        
    Returns:
        List of fitness values as tuples
    """
    fitnesses = []
    
    for me in mes:
        total_score = 0
        goals_reached = 0
        
        # Run multiple game rounds from different starting positions
        for _ in range(N_GAME_ROUNDS):
            # Generate start position away from goal
            start_pos = [random.randint(0, GRID_SIZE-1), random.randint(0, GRID_SIZE-1)]
            while start_pos == GOAL_POS:  # Ensure not starting at goal
                start_pos = [random.randint(0, GRID_SIZE-1), random.randint(0, GRID_SIZE-1)]
            
            # Simulate game round
            score, steps, reached_goal = simulate_game_round(me, start_pos)
            
            total_score += score
            if reached_goal:
                goals_reached += 1
        
        # Average score across game rounds
        avg_score = total_score / N_GAME_ROUNDS
        
        # Add goal-reaching bonus to fitness
        final_fitness = avg_score + (goals_reached * 20)
        
        fitnesses.append((final_fitness,))
    
    return fitnesses

# Initialize DEAP toolbox
toolbox = base.Toolbox()

# Register functions for generating genes, individuals, and population
toolbox.register("gene_maker", random.uniform, -1, 1)  # weights in [-1, 1]
toolbox.register("individual_creator", tools.initRepeat, creator.Individual, 
                 toolbox.gene_maker, n=GENOME_LENGTH)
toolbox.register("population", tools.initRepeat, list, toolbox.individual_creator)

# Register genetic algorithm operators
toolbox.register("evaluate", lambda ind: handle_mes_fitnesses([ind])[0])
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.3, indpb=MUTATION_INDPB)
toolbox.register("select", tools.selTournament, tournsize=TOURNAMENT_SIZE)

# Initialize statistics tracking
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)
stats.register("max", np.max)
stats.register("std", np.std)

def visualize_best_agent(genome, n_trials=3):
    """
    Visualize the behavior of the best agent in the grid world.
    
    Args:
        genome: Genome (weights) of the best agent
        n_trials: Number of trials to visualize
    """
    plt.figure(figsize=(15, 5*n_trials))
    
    for trial in range(n_trials):
        # Initialize random starting position (away from goal)
        start_pos = [random.randint(0, GRID_SIZE-1), random.randint(0, GRID_SIZE-1)]
        while start_pos == GOAL_POS:
            start_pos = [random.randint(0, GRID_SIZE-1), random.randint(0, GRID_SIZE-1)]
        
        pos = start_pos.copy()
        path = [pos.copy()]
        steps = 0
        reached_goal = False
        
        # Simulate agent's path
        while steps < MAX_STEPS and not reached_goal:
            inp = sense_environment(pos, GOAL_POS, OBSTACLES)
            move = nn_navigate_me(genome, inp)
            
            # Update position
            new_pos = pos.copy()
            if move == 0 and pos[1] < GRID_SIZE-1:  # up
                new_pos[1] += 1
            elif move == 1 and pos[1] > 0:  # down
                new_pos[1] -= 1
            elif move == 2 and pos[0] > 0:  # left
                new_pos[0] -= 1
            elif move == 3 and pos[0] < GRID_SIZE-1:  # right
                new_pos[0] += 1
            
            # Check if new position is an obstacle
            if OBSTACLES and [new_pos[0], new_pos[1]] in OBSTACLES:
                pass  # Stay in current position
            else:
                pos = new_pos
                
            path.append(pos.copy())
            
            if pos == GOAL_POS:
                reached_goal = True
                
            steps += 1
        
        # Plot the grid and path
        plt.subplot(n_trials, 1, trial+1)
        plt.grid(True)
        plt.xlim(-0.5, GRID_SIZE-0.5)
        plt.ylim(-0.5, GRID_SIZE-0.5)
        
        # Draw grid
        for i in range(GRID_SIZE):
            for j in range(GRID_SIZE):
                plt.plot([i-0.5, i+0.5], [j-0.5, j-0.5], 'k-', alpha=0.2)
                plt.plot([i-0.5, i-0.5], [j-0.5, j+0.5], 'k-', alpha=0.2)
                plt.plot([i+0.5, i+0.5], [j-0.5, j+0.5], 'k-', alpha=0.2)
                plt.plot([i-0.5, i+0.5], [j+0.5, j+0.5], 'k-', alpha=0.2)
        
        # Draw obstacles if any
        if OBSTACLES:
            for ox, oy in OBSTACLES:
                plt.fill([ox-0.5, ox+0.5, ox+0.5, ox-0.5], 
                         [oy-0.5, oy-0.5, oy+0.5, oy+0.5], 'gray', alpha=0.5)
        
        # Draw start and goal
        plt.plot(start_pos[0], start_pos[1], 'go', markersize=10, label='Start')
        plt.plot(GOAL_POS[0], GOAL_POS[1], 'ro', markersize=10, label='Goal')
        
        # Draw path
        path_x = [p[0] for p in path]
        path_y = [p[1] for p in path]
        plt.plot(path_x, path_y, 'b-', linewidth=2, alpha=0.6)
        plt.plot(path_x, path_y, 'b.', markersize=5)
        
        # Add labels
        plt.title(f'Trial {trial+1}: {"Goal Reached" if reached_goal else "Failed"} in {len(path)-1} steps')
        plt.xlabel('X')
        plt.ylabel('Y')
        plt.legend()
    
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    # Create initial population
    population = toolbox.population(n=POPULATION_SIZE)
    
    # Evaluate fitness for all individuals
    fitnesses = handle_mes_fitnesses(population)
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit
    
    # Create hall of fame to store top individuals
    hof = tools.HallOfFame(ELITISM_SIZE)
    hof.update(population)
    
    # Track fitness history for plotting
    fitness_history = []
    gen_history = []
    
    # Main evolutionary loop
    for gen in range(MAX_GENERATIONS):
        print(f"-- Generation {gen} --")
        
        # Select the next generation individuals
        offspring = toolbox.select(population, len(population) - ELITISM_SIZE)
        
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation
        for i in range(0, len(offspring), 2):
            if i+1 < len(offspring) and random.random() < CROSSOVER_RATE:
                offspring[i], offspring[i+1] = toolbox.mate(offspring[i], offspring[i+1])
                del offspring[i].fitness.values
                del offspring[i+1].fitness.values
        
        for i in range(len(offspring)):
            if random.random() < MUTATION_RATE:
                offspring[i], = toolbox.mutate(offspring[i])
                del offspring[i].fitness.values
        
        # Add elites from previous generation
        elites = toolbox.clone(tools.selBest(population, ELITISM_SIZE))
        
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = handle_mes_fitnesses(invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        
        # Replace population with offspring + elites
        population[:] = offspring + elites
        
        # Update hall of fame
        hof.update(population)
        
        # Gather statistics
        record = stats.compile(population)
        print(f"  Min: {record['min']:.2f}, Max: {record['max']:.2f}, Avg: {record['avg']:.2f}, Std: {record['std']:.2f}")
        
        # Record max fitness for this generation
        gen_history.append(gen)
        fitness_history.append(record['max'])
    
    # Extract best individual
    best_individual = tools.selBest(population, 1)[0]
    
    # Print results
    print("\n=== Best Individual ===")
    print(f"Fitness: {best_individual.fitness.values[0]:.2f}")
    
    # Plot fitness evolution
    plt.figure(figsize=(10, 6))
    plt.plot(gen_history, fitness_history, linewidth=2)
    plt.xlabel('Generation')
    plt.ylabel('Max Fitness (Score)')
    plt.title('Evolution of Neural Network Fitness')
    plt.grid(True)
    plt.show()
    
    # Visualize best agent's behavior
    print("\nVisualizing best agent's behavior...")
    visualize_best_agent(best_individual)

gen	nevals	avg  	min	max
0  	0     	7.728	1  	50 
1  	392   	9.879	1  	50 


AttributeError: 'list' object has no attribute 'fitness'