In [None]:
# ============================================================================
# CELL 1: State Management & Utilities
# ============================================================================

from pathlib import Path
import json
import numpy as np
import time
from collections import deque

BASE_PATH = Path("C:/Users/HP/Documents/cogai/")
BASE_PATH_MAW = Path("C:/Users/natmaw/Documents/Boston Stuff/CS 5100 Foundations of AI/cogai/")
ACTION_FILE = BASE_PATH / "action.json"
STATE_FILE = BASE_PATH / "game_state.json"

EXPECTED_STATE_DIM = 6
PALETTE_DIM = 768
TILE_DIM = 600

# State vector from Lua: [x, y, map_id, in_battle, menu_flag, direction]
# x, y: 0-255 (raw tile coordinates)
# map_id: 0-255
# in_battle: 0 or 1
# menu_flag: 0 or 1 (from game_state == 1)
# direction: 0-3 (DOWN=0, UP=1, LEFT=2, RIGHT=3) - already normalized by Lua

def normalize_game_state(raw_state):
    """Normalize context state for learning.
    
    Lua sends: [x, y, map_id, in_battle, menu_flag, direction]
    - x, y: 0-255 tile coordinates
    - map_id: 0-255
    - in_battle: 0 or 1
    - menu_flag: 0 or 1
    - direction: 0-3 (already normalized by Lua)
    """
    if len(raw_state) < 6:
        return raw_state
    
    normalized = raw_state.copy()
    
    # Normalize x, y to 0-1 range for learning
    normalized[0] = raw_state[0] / 255.0
    normalized[1] = raw_state[1] / 255.0
    
    # Map ID: keep as-is but clamp
    normalized[2] = np.clip(raw_state[2], 0, 255)
    
    # Battle flag: ensure binary
    normalized[3] = 1.0 if raw_state[3] > 0 else 0.0
    
    # Menu flag: ensure 0-1
    normalized[4] = 1.0 if raw_state[4] > 0 else 0.0
    
    # Direction: already 0-3 from Lua, keep as-is
    normalized[5] = int(raw_state[5]) % 4
    
    return normalized

def compute_derived_features(current, prev):
    """Extract temporal features (8D)"""
    if prev is None:
        return np.zeros(8)
    
    # Velocity uses normalized coordinates
    vel_x = current[0] - prev[0]
    vel_y = current[1] - prev[1]
    map_changed = 1.0 if abs(current[2] - prev[2]) > 0.5 else 0.0
    battle_started = 1.0 if current[3] > prev[3] else 0.0
    battle_ended = 1.0 if current[3] < prev[3] else 0.0
    menu_opened = 1.0 if current[4] > prev[4] else 0.0
    menu_closed = 1.0 if current[4] < prev[4] else 0.0
    direction_changed = 1.0 if current[5] != prev[5] else 0.0
    
    return np.array([vel_x, vel_y, map_changed, battle_started, battle_ended,
                     menu_opened, menu_closed, direction_changed])

def build_learning_state(derived, palette, tiles, in_battle):
    """
    HYBRID PERCEPTION:
    - Overworld: tiles (spatial) + palette (context)
    - Battle: palette only (tiles are just UI)
    """
    if in_battle > 0.5:
        state = np.concatenate([derived, palette])
    else:
        state = np.concatenate([derived, tiles, palette])
    
    noise = np.random.randn(len(state)) * 0.0001
    return state + noise

def read_game_state(max_retries=3):
    """
    Returns:
        context_state: normalized state for learning/error calculation
        palette_state: visual palette data
        tile_state: visual tile data
        dead: death flag
        raw_position: (raw_x, raw_y) for tile tracking - NOT normalized
    """
    if not STATE_FILE.exists():
        return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0)

    for attempt in range(max_retries):
        try:
            with open(STATE_FILE, "r") as f:
                data = json.loads(f.read())
            
            raw = data.get("state", [])
            palette_raw = data.get("palette", [])
            tiles_raw = data.get("tiles", [])
            dead = bool(data.get("dead", False))
            
            # Store raw position BEFORE normalization (these are tile coordinates 0-255)
            raw_x = int(raw[0]) if len(raw) > 0 else 0
            raw_y = int(raw[1]) if len(raw) > 1 else 0
            raw_position = (raw_x, raw_y)

            context_state = normalize_game_state(np.array(raw, dtype=float))
            palette_state = np.array(palette_raw, dtype=float) if palette_raw else np.zeros(PALETTE_DIM)
            tile_state = np.array(tiles_raw, dtype=float) if tiles_raw else np.zeros(TILE_DIM)
            
            break

        except (json.JSONDecodeError, ValueError):
            if attempt < max_retries - 1:
                time.sleep(0.001)
                continue
            return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0)
        except Exception:
            return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0)

    if context_state.shape[0] < EXPECTED_STATE_DIM:
        context_state = np.pad(context_state, (0, EXPECTED_STATE_DIM - context_state.shape[0]))
    elif context_state.shape[0] > EXPECTED_STATE_DIM:
        context_state = context_state[:EXPECTED_STATE_DIM]
    
    if palette_state.shape[0] < PALETTE_DIM:
        palette_state = np.pad(palette_state, (0, PALETTE_DIM - palette_state.shape[0]))
    elif palette_state.shape[0] > PALETTE_DIM:
        palette_state = palette_state[:PALETTE_DIM]
    
    if tile_state.shape[0] < TILE_DIM:
        tile_state = np.pad(tile_state, (0, TILE_DIM - tile_state.shape[0]))
    elif tile_state.shape[0] > TILE_DIM:
        tile_state = tile_state[:TILE_DIM]

    return context_state, palette_state, tile_state, dead, raw_position

def write_action(action_name):
    if action_name:
        action_name = action_name.upper()
    
    try:
        with open(ACTION_FILE, "w") as f:
            json.dump({"action": action_name}, f)
            f.flush()
    except Exception as e:
        print(f"[ERROR] Failed to write action: {e}")

In [17]:
# ============================================================================
# CELL 2: Perceptron Classes
# ============================================================================

class Perceptron:
    def __init__(self, kind, action=None, group=None, entity_type=None):
        self.kind = kind
        self.action = action
        self.group = group
        self.entity_type = entity_type
        
        self.utility = 1.0
        self.weights = None
        
        self.eligibility_fast = 0.0
        self.eligibility_slow = 0.0
        
        self.familiarity = 0.0
        self.activation_history = deque(maxlen=10)
        
        self.learning_rate = 0.01
        self.prediction_errors = deque(maxlen=50)

    def ensure_weights(self, dim):
        if self.weights is None:
            self.weights = np.random.randn(dim) * 0.001

    def predict(self, state):
        self.ensure_weights(len(state))
        raw_activation = np.dot(self.weights, state)
        
        if self.kind == "entity":
            novelty_factor = 1.0 / (1.0 + np.sqrt(self.familiarity * 0.5))
            decayed_activation = raw_activation * novelty_factor
            self.activation_history.append(abs(raw_activation))
            return decayed_activation
        else:
            return raw_activation

    def adapt_learning_rate(self):
        if len(self.prediction_errors) >= 50:
            avg_error = np.mean(self.prediction_errors)
            
            if avg_error < 0.1:
                self.learning_rate = max(0.001, self.learning_rate * 0.99)
            elif avg_error > 0.5:
                self.learning_rate = min(0.05, self.learning_rate * 1.01)

    def update(self, state, error, gamma_fast=0.5, gamma_slow=0.95, stagnation=0.0):
        self.ensure_weights(len(state))
        
        self.eligibility_fast = gamma_fast * self.eligibility_fast + 1.0
        self.eligibility_slow = gamma_slow * self.eligibility_slow + 1.0
        
        self.adapt_learning_rate()
        
        fast_update = 0.7 * self.learning_rate * error * state * self.eligibility_fast
        slow_update = 0.3 * self.learning_rate * error * state * self.eligibility_slow
        self.weights += fast_update + slow_update

        if self.kind == "action":
            if error > 0.01:
                if stagnation > 0.5:
                    self.utility *= 0.97
                elif error > 0.2:
                    self.utility = min(self.utility * 1.02, 2.0)
                else:
                    self.utility *= 0.995
            
            if self.group == "move":
                self.utility = np.clip(self.utility, 0.1, 2.0)
            else:
                self.utility = np.clip(self.utility, 0.01, 2.0)
        
        if self.kind == "entity" and len(self.activation_history) > 0:
            recent_avg = np.mean(self.activation_history)
            if recent_avg > 0.1:
                self.familiarity += 0.03
        
        if self.kind == "entity":
            prediction = self.predict(state)
            self.prediction_errors.append(abs(prediction - error))


class ControlSwapPerceptron(Perceptron):
    def __init__(self):
        super().__init__(kind="control_swap")
        self.swap_history = deque(maxlen=100)
        self.confidence = 0.0
        
    def should_swap(self, state, movement_stagnation):
        if self.weights is None:
            return False, 0.0
        
        self.ensure_weights(len(state))
        swap_score = np.dot(self.weights, state)
        stagnation_factor = np.tanh(movement_stagnation / 5.0)
        combined_score = swap_score * 0.7 + stagnation_factor * 0.3
        
        return combined_score > 0.5, abs(combined_score)
    
    def record_swap_outcome(self, state, swapped, novelty_gained):
        self.swap_history.append((swapped, novelty_gained))
        
        if len(self.swap_history) >= 20:
            recent = list(self.swap_history)[-20:]
            successful = sum(1 for swap, nov in recent if swap and nov > 0.2)
            self.confidence = successful / 20.0

In [18]:
# ============================================================================
# CELL 3: Brain Class - Tile-Based Interaction Probing
# ============================================================================

class Brain:
    def __init__(self):
        self.perceptrons = []
        
        self.prev_learning_states = deque(maxlen=50)
        self.prev_context_states = deque(maxlen=10)
        self.last_positions = deque(maxlen=30)
        self.action_history = deque(maxlen=100)
        
        self.control_mode = "move"
        self.timestep = 0
        self.last_action = None
        self.last_direction = 0
        
        self.MOVE_UTILITY_FLOOR = 0.05
        self.INTERACT_UTILITY_FLOOR = 0.15
        
        # === PERSISTENT EXPLORATION MEMORY ===
        self.EXPLORATION_MEMORY_FILE = BASE_PATH / "exploration_memory.json"
        self.exploration_memory = {}
        self.current_map_id = None
        self.SAVE_INTERVAL = 100
        
        # Direction mapping
        self.DIRECTION_NAMES = {0: "DOWN", 1: "UP", 2: "LEFT", 3: "RIGHT"}
        self.DIRECTION_TO_INT = {"DOWN": 0, "UP": 1, "LEFT": 2, "RIGHT": 3}
        
        self.DIRECTION_DELTAS_INT = {
            0: (0, 1),   # DOWN
            1: (0, -1),  # UP
            2: (-1, 0),  # LEFT
            3: (1, 0)    # RIGHT
        }
        
        self.ACTION_DELTAS = {
            "UP": (0, -1),
            "DOWN": (0, 1),
            "LEFT": (-1, 0),
            "RIGHT": (1, 0)
        }
        
        self.DELTA_TO_DIRECTION = {
            (0, 1): 0,   # DOWN
            (0, -1): 1,  # UP
            (-1, 0): 2,  # LEFT
            (1, 0): 3    # RIGHT
        }
        
        self.load_exploration_memory()
        
        # === ACTION EXECUTION CONFIRMATION ===
        self.pending_action = None
        self.pending_action_frames = 0
        self.ACTION_CONFIRM_FRAMES = 3
        self.last_confirmed_action = None
        
        # === TILE INTERACTION PROBING ===
        self.INTERACTION_VERIFY_FRAMES = 5  # Frames to wait after A-press to verify result
        self.MIN_SUCCESS_RATE_THRESHOLD = 0.1  # Below this, consider direction "failed"
        self.pending_interaction_verify = None  # Track pending interaction verification
        self.interaction_verify_countdown = 0
        
        # === MENU ESCAPE B-BOOST ===
        self.menu_trap_frames = 0
        self.menu_trap_b_boost = 1.0
        self.menu_trap_position = None
        self.B_BOOST_INCREMENT = 0.15
        self.B_BOOST_MAX = 3.0
        self.MENU_TRAP_THRESHOLD = 5
        self.original_b_utility = None
        
        # === ADAPTIVE MODE SWAPPING ===
        self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD = 15
        self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD = 25
        
        self.move_to_interact_threshold = self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD
        self.interact_to_move_threshold = self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD
        
        self.THRESHOLD_INCREMENT = 15
        self.MAX_THRESHOLD = 150
        
        self.frames_in_current_mode = 0
        self.swap_chain_count = 0
        self.position_at_mode_swap = None
        
        self.last_map_id = None
        self.last_battle_state = None
        
        # === UNPRODUCTIVE MODE SWAP TRACKING ===
        self.UNPRODUCTIVE_SWAP_THRESHOLD = 3
        self.unproductive_swap_count = 0
        self.utilities_before_swapping = {}
        self.swap_chain_active = False
        self.last_action_before_productive = None
        
        # === STATE STAGNATION DETECTION ===
        self.STATE_STAGNATION_THRESHOLD = 20
        self.state_stagnation_count = 0
        self.last_context_state_hash = None
        self.stagnation_initiator_action = None
        self.STAGNATION_INITIATOR_PENALTY = 0.7
        
        # === NOVELTY WEIGHTS ===
        self.UNVISITED_TILE_BONUS = 1.5  # Increased from 1.4
        self.OBSTRUCTION_PENALTY = 0.25
        
        # === TRANSITION SYSTEM ===
        self.TRANSITION_ATTRACTION_WEIGHT = 0.6
        
        # Temp debt parameters
        self.TEMP_DEBT_ACCUMULATION = 0.5
        self.TEMP_DEBT_DECAY = 0.02
        self.TEMP_DEBT_MAX = 15.0
        
        # === TRANSITION BAN SYSTEM ===
        self.transition_bans = {}  # map_id -> ban info
        self.BAN_VICINITY_RADIUS = 3
        self.BAN_COVERAGE_LIFT_THRESHOLD = 0.6
        self.BAN_TIMEOUT_STEPS = 300
        
        # Interaction outcome weights
        self.OUTCOME_WEIGHTS = {
            'map_transition': 1.0,
            'battle': 0.8,
            'menu': 0.6,
            'dialogue': 0.3,
            'nothing': 0.0
        }
        
        # Multi-scale memory
        self.visited_maps = {}
        self.map_novelty_debt = {}
        self.location_memory = {}
        self.location_novelty = {}
        
        self.action_execution_count = {}
        
        self.swap_perceptron = ControlSwapPerceptron()
        
        self.error_history = deque(maxlen=1000)
        self.numeric_error_history = deque(maxlen=1000)
        self.visual_error_history = deque(maxlen=1000)
        
        self._entity_norms_cache = {}
        self._cache_valid = False
        
        self.innate_entities_spawned = False
        
        # === REPETITION CORRECTION ===
        self.consecutive_action_count = 0
        self.current_repeated_action = None
        self.LEARNING_SLOWDOWN_START = 3
        self.LEARNING_SLOWDOWN_MAX = 10
        self.PENALTY_THRESHOLD = 12
        self.HARD_RESET_THRESHOLD = 18
        
        # === PATTERN DETECTION ===
        self.PATTERN_CHECK_WINDOW = 50
        self.PATTERN_MIN_REPEATS = 3
        self.PATTERN_MAX_LENGTH = 10
        self.detected_pattern = None
        self.pattern_repeat_count = 0

    # =========================================================================
    # ACTION EXECUTION CONFIRMATION
    # =========================================================================
    
    def set_pending_action(self, action_name):
        self.pending_action = action_name
        self.pending_action_frames = 0
    
    def confirm_action_executed(self, context_state, prev_context_state):
        if self.pending_action is None:
            return True
        
        self.pending_action_frames += 1
        action_executed = False
        
        if prev_context_state is not None:
            if self.pending_action in ["UP", "DOWN", "LEFT", "RIGHT"]:
                pos_changed = (context_state[0] != prev_context_state[0] or 
                              context_state[1] != prev_context_state[1])
                dir_changed = context_state[5] != prev_context_state[5]
                action_executed = pos_changed or dir_changed
            elif self.pending_action in ["A", "B", "Start", "Select"]:
                menu_changed = abs(context_state[4] - prev_context_state[4]) > 0.1
                battle_changed = context_state[3] != prev_context_state[3]
                map_changed = context_state[2] != prev_context_state[2]
                action_executed = menu_changed or battle_changed or map_changed
        
        if action_executed or self.pending_action_frames >= self.ACTION_CONFIRM_FRAMES:
            self.last_confirmed_action = self.pending_action
            self.pending_action = None
            self.pending_action_frames = 0
            return True
        
        return False
    
    def should_send_new_action(self):
        if self.pending_action is None:
            return True
        if self.pending_action_frames >= self.ACTION_CONFIRM_FRAMES:
            return True
        return False

    # =========================================================================
    # EXPLORATION MEMORY PERSISTENCE
    # =========================================================================
    
    def load_exploration_memory(self):
        try:
            if self.EXPLORATION_MEMORY_FILE.exists():
                with open(self.EXPLORATION_MEMORY_FILE, 'r') as f:
                    data = json.load(f)
                    self.exploration_memory = {}
                    for map_key, map_data in data.items():
                        map_id = int(map_key.replace('map_', ''))
                        self.exploration_memory[map_id] = self._deserialize_map_memory(map_data)
                print(f"  Loaded exploration memory: {len(self.exploration_memory)} maps")
            else:
                self.exploration_memory = {}
                print(f"  No exploration memory found, starting fresh")
        except Exception as e:
            print(f"  Error loading exploration memory: {e}")
            self.exploration_memory = {}

    def _deserialize_map_memory(self, map_data):
        memory = {
            'visited_tiles': set(tuple(t) for t in map_data.get('visited_tiles', [])),
            'obstructions': set(tuple(t) for t in map_data.get('obstructions', [])),
            'interactable_objects': map_data.get('interactable_objects', []),
            'last_visited_timestep': map_data.get('last_visited_timestep', 0),
            'transitions': map_data.get('transitions', []),
            'temp_debt': map_data.get('temp_debt', 0.0),
            'tile_interactions': {}
        }
        
        # Deserialize tile_interactions
        raw_tile_interactions = map_data.get('tile_interactions', {})
        for tile_key, tile_data in raw_tile_interactions.items():
            memory['tile_interactions'][tile_key] = {
                'directions_tried': set(tile_data.get('directions_tried', [])),
                'direction_attempts': tile_data.get('direction_attempts', {0: 0, 1: 0, 2: 0, 3: 0}),
                'direction_successes': tile_data.get('direction_successes', {0: 0, 1: 0, 2: 0, 3: 0}),
                'exhausted': tile_data.get('exhausted', False)
            }
            # Convert string keys to int for direction dicts
            memory['tile_interactions'][tile_key]['direction_attempts'] = {
                int(k): v for k, v in memory['tile_interactions'][tile_key]['direction_attempts'].items()
            }
            memory['tile_interactions'][tile_key]['direction_successes'] = {
                int(k): v for k, v in memory['tile_interactions'][tile_key]['direction_successes'].items()
            }
        
        return memory

    def save_exploration_memory(self):
        try:
            data = {}
            for map_id, map_data in self.exploration_memory.items():
                data[f'map_{map_id}'] = self._serialize_map_memory(map_data)
            
            with open(self.EXPLORATION_MEMORY_FILE, 'w') as f:
                json.dump(data, f, indent=2)
        except Exception as e:
            print(f"  Error saving exploration memory: {e}")

    def _serialize_map_memory(self, map_data):
        # Serialize tile_interactions
        serialized_tile_interactions = {}
        for tile_key, tile_data in map_data.get('tile_interactions', {}).items():
            serialized_tile_interactions[tile_key] = {
                'directions_tried': list(tile_data.get('directions_tried', set())),
                'direction_attempts': {str(k): v for k, v in tile_data.get('direction_attempts', {}).items()},
                'direction_successes': {str(k): v for k, v in tile_data.get('direction_successes', {}).items()},
                'exhausted': tile_data.get('exhausted', False)
            }
        
        serialized = {
            'visited_tiles': list(map_data['visited_tiles']),
            'obstructions': list(map_data['obstructions']),
            'interactable_objects': map_data['interactable_objects'],
            'last_visited_timestep': map_data['last_visited_timestep'],
            'transitions': map_data.get('transitions', []),
            'temp_debt': map_data.get('temp_debt', 0.0),
            'tile_interactions': serialized_tile_interactions
        }
        return serialized

    def get_current_map_memory(self, map_id):
        if map_id not in self.exploration_memory:
            self.exploration_memory[map_id] = {
                'visited_tiles': set(),
                'obstructions': set(),
                'interactable_objects': [],
                'last_visited_timestep': self.timestep,
                'transitions': [],
                'temp_debt': 0.0,
                'tile_interactions': {}
            }
        return self.exploration_memory[map_id]

    def record_visited_tile(self, x, y, map_id):
        tile = (int(x), int(y))
        memory = self.get_current_map_memory(map_id)
        memory['visited_tiles'].add(tile)
        memory['last_visited_timestep'] = self.timestep

    def record_obstruction(self, x, y, map_id, direction):
        dx, dy = self.DIRECTION_DELTAS_INT.get(direction, (0, 0))
        obstruction_tile = (int(x + dx), int(y + dy))
        memory = self.get_current_map_memory(map_id)
        memory['obstructions'].add(obstruction_tile)

    # =========================================================================
    # TILE-BASED INTERACTION PROBING
    # =========================================================================
    
    def get_tile_interaction_key(self, x, y):
        return f"{int(x)}_{int(y)}"
    
    def get_tile_interaction_state(self, x, y, map_id):
        """Get or create interaction state for a tile."""
        memory = self.get_current_map_memory(map_id)
        tile_key = self.get_tile_interaction_key(x, y)
        
        if tile_key not in memory['tile_interactions']:
            memory['tile_interactions'][tile_key] = {
                'directions_tried': set(),
                'direction_attempts': {0: 0, 1: 0, 2: 0, 3: 0},
                'direction_successes': {0: 0, 1: 0, 2: 0, 3: 0},
                'exhausted': False
            }
        
        return memory['tile_interactions'][tile_key]
    
    def should_interact_at_tile(self, x, y, map_id):
        """Returns True if this tile still needs interaction probing."""
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        
        # If exhausted (all directions tried, none successful), don't interact
        if tile_state['exhausted']:
            return False
        
        # If any direction hasn't been tried yet, should interact
        if len(tile_state['directions_tried']) < 4:
            return True
        
        # All directions tried - check if any had success
        for direction in range(4):
            attempts = tile_state['direction_attempts'].get(direction, 0)
            successes = tile_state['direction_successes'].get(direction, 0)
            if attempts > 0 and successes > 0:
                # At least one direction has success, might want to interact again
                success_rate = successes / attempts
                if success_rate >= self.MIN_SUCCESS_RATE_THRESHOLD:
                    return True
        
        return False
    
    def get_untried_directions(self, x, y, map_id):
        """Returns list of directions not yet tried at this tile."""
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        tried = tile_state['directions_tried']
        return [d for d in range(4) if d not in tried]
    
    def get_best_interaction_direction(self, x, y, map_id):
        """Returns the best direction to try interacting.
        Priority: untried directions first, then highest success rate."""
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        
        # First, try untried directions
        untried = self.get_untried_directions(x, y, map_id)
        if untried:
            return untried[0]  # Return first untried
        
        # All tried - return direction with highest success rate (if any)
        best_dir = None
        best_rate = 0.0
        
        for direction in range(4):
            attempts = tile_state['direction_attempts'].get(direction, 0)
            successes = tile_state['direction_successes'].get(direction, 0)
            if attempts > 0:
                rate = successes / attempts
                if rate > best_rate:
                    best_rate = rate
                    best_dir = direction
        
        return best_dir
    
    def record_tile_interaction_attempt(self, x, y, map_id, direction, success):
        """Record an interaction attempt and its result."""
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        
        # Mark direction as tried
        tile_state['directions_tried'].add(direction)
        
        # Update attempt count
        tile_state['direction_attempts'][direction] = \
            tile_state['direction_attempts'].get(direction, 0) + 1
        
        # Update success count
        if success:
            tile_state['direction_successes'][direction] = \
                tile_state['direction_successes'].get(direction, 0) + 1
            
            # Record as interactable object
            memory = self.get_current_map_memory(map_id)
            dir_name = self.DIRECTION_NAMES.get(direction, str(direction))
            interactable = [int(x), int(y), dir_name]
            if interactable not in memory['interactable_objects']:
                memory['interactable_objects'].append(interactable)
                print(f"  üéØ INTERACTABLE FOUND: ({x}, {y}) facing {dir_name}")
        
        # Check if tile should be marked exhausted
        self._check_tile_exhaustion(x, y, map_id)
    
    def _check_tile_exhaustion(self, x, y, map_id):
        """Check if all directions tried and none successful - mark exhausted."""
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        
        # Need all 4 directions tried
        if len(tile_state['directions_tried']) < 4:
            return
        
        # Check if any direction has success
        any_success = False
        for direction in range(4):
            successes = tile_state['direction_successes'].get(direction, 0)
            if successes > 0:
                any_success = True
                break
        
        if not any_success:
            tile_state['exhausted'] = True
            print(f"  ‚úì Tile ({x}, {y}) exhausted - no interactions found")
    
    def get_direction_success_rate(self, x, y, map_id, direction):
        """Get success rate for a specific direction at a tile."""
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        attempts = tile_state['direction_attempts'].get(direction, 0)
        successes = tile_state['direction_successes'].get(direction, 0)
        
        if attempts == 0:
            return None  # Never tried
        
        return successes / attempts
    
    def start_interaction_verification(self, x, y, map_id, direction):
        """Start verifying if an interaction was successful."""
        self.pending_interaction_verify = {
            'x': x, 'y': y, 'map_id': map_id, 'direction': direction
        }
        self.interaction_verify_countdown = self.INTERACTION_VERIFY_FRAMES
    
    def check_interaction_verification(self, context_state, prev_context_state):
        """Check if pending interaction was successful."""
        if self.pending_interaction_verify is None:
            return
        
        self.interaction_verify_countdown -= 1
        
        # Detect if something happened
        success = False
        if prev_context_state is not None:
            menu_changed = abs(context_state[4] - prev_context_state[4]) > 0.1
            battle_started = context_state[3] > 0.5 and prev_context_state[3] <= 0.5
            map_changed = int(context_state[2]) != int(prev_context_state[2])
            success = menu_changed or battle_started or map_changed
        
        # If success detected or countdown finished, record result
        if success or self.interaction_verify_countdown <= 0:
            info = self.pending_interaction_verify
            self.record_tile_interaction_attempt(
                info['x'], info['y'], info['map_id'], info['direction'], success
            )
            self.pending_interaction_verify = None
            self.interaction_verify_countdown = 0

    # =========================================================================
    # TRANSITION SYSTEM
    # =========================================================================
    
    def record_transition(self, from_pos, from_map, to_map, direction, action_type):
        memory = self.get_current_map_memory(from_map)
        
        for t in memory['transitions']:
            if t['position'] == from_pos and t['direction'] == direction:
                t['use_count'] += 1
                t['last_used'] = self.timestep
                return
        
        memory['transitions'].append({
            'position': from_pos,
            'direction': direction,
            'action': action_type,
            'destination_map': to_map,
            'use_count': 1,
            'last_used': self.timestep
        })
        
        print(f"  üö™ TRANSITION FOUND: Map {from_map} ({from_pos}) ‚Üí Map {to_map}")

    def get_transition_attraction(self, current_map):
        memory = self.get_current_map_memory(current_map)
        transitions = memory.get('transitions', [])
        
        if not transitions:
            return 0.0, None
        
        current_debt = self.map_novelty_debt.get(current_map, 0.0)
        current_temp_debt = self.get_temp_debt(current_map)
        current_coverage = self.get_exploration_coverage(current_map)
        
        best_attraction = 0.0
        best_transition = None
        
        for t in transitions:
            dest_map = t['destination_map']
            
            # Check if this transition is banned
            if self.is_transition_banned(current_map, t['position'], t['direction']):
                continue
            
            dest_debt = self.map_novelty_debt.get(dest_map, 0.0)
            dest_temp_debt = self.get_temp_debt(dest_map)
            dest_coverage = self.get_exploration_coverage(dest_map)
            
            total_current = current_debt + current_temp_debt * 2.0
            total_dest = dest_debt + dest_temp_debt * 2.0
            
            debt_diff = total_current - total_dest
            coverage_diff = current_coverage - dest_coverage
            
            attraction = debt_diff * 0.5 + coverage_diff * 0.5
            
            if t['use_count'] < 3:
                attraction *= 1.5
            
            if attraction > best_attraction:
                best_attraction = attraction
                best_transition = t
        
        return best_attraction * self.TRANSITION_ATTRACTION_WEIGHT, best_transition

    # =========================================================================
    # TRANSITION BAN SYSTEM
    # =========================================================================
    
    def create_transition_ban(self, map_id, tile_pos, direction_back):
        """Create a ban for the transition that leads back to previous map."""
        self.transition_bans[map_id] = {
            'banned_tile': tile_pos,
            'banned_direction': direction_back,
            'vicinity_radius': self.BAN_VICINITY_RADIUS,
            'vicinity_active': False,
            'created_at': self.timestep
        }
        print(f"  üö´ TRANSITION BAN: Map {map_id} at {tile_pos} facing {self.DIRECTION_NAMES.get(direction_back, '?')}")
    
    def is_transition_banned(self, map_id, position, direction):
        """Check if a transition is currently banned."""
        if map_id not in self.transition_bans:
            return False
        
        ban = self.transition_bans[map_id]
        banned_tile = ban['banned_tile']
        banned_dir = ban['banned_direction']
        
        if isinstance(position, list):
            position = tuple(position)
        if isinstance(banned_tile, list):
            banned_tile = tuple(banned_tile)
        
        # Exact tile + direction match
        if position == banned_tile and direction == banned_dir:
            return True
        
        # Vicinity check (if active)
        if ban['vicinity_active']:
            dist = abs(position[0] - banned_tile[0]) + abs(position[1] - banned_tile[1])
            if dist <= ban['vicinity_radius'] and direction == banned_dir:
                return True
        
        return False
    
    def is_position_banned(self, map_id, x, y, direction):
        """Check if moving in a direction from position would hit a banned transition."""
        return self.is_transition_banned(map_id, (x, y), direction)
    
    def update_transition_ban(self, map_id, current_pos):
        """Update ban state - activate vicinity when agent moves away."""
        if map_id not in self.transition_bans:
            return
        
        ban = self.transition_bans[map_id]
        banned_tile = ban['banned_tile']
        
        if isinstance(banned_tile, list):
            banned_tile = tuple(banned_tile)
        
        # Calculate distance from banned tile
        dist = abs(current_pos[0] - banned_tile[0]) + abs(current_pos[1] - banned_tile[1])
        
        # Activate vicinity when agent moves 3+ tiles away
        if not ban['vicinity_active'] and dist >= 3:
            ban['vicinity_active'] = True
            print(f"  üö´ VICINITY BAN ACTIVE: Map {map_id}")
    
    def check_ban_lift_conditions(self, map_id):
        """Check if ban should be lifted."""
        if map_id not in self.transition_bans:
            return
        
        ban = self.transition_bans[map_id]
        should_lift = False
        reason = ""
        
        # Condition 1: Another transition found on this map
        memory = self.get_current_map_memory(map_id)
        transitions = memory.get('transitions', [])
        non_banned_transitions = [
            t for t in transitions 
            if not self.is_transition_banned(map_id, t['position'], t['direction'])
        ]
        if len(non_banned_transitions) > 0:
            should_lift = True
            reason = "alternative transition found"
        
        # Condition 2: Coverage threshold reached
        coverage = self.get_exploration_coverage(map_id)
        if coverage >= self.BAN_COVERAGE_LIFT_THRESHOLD:
            should_lift = True
            reason = f"coverage {coverage:.0%} reached"
        
        # Condition 3: Timeout
        steps_since_ban = self.timestep - ban['created_at']
        if steps_since_ban >= self.BAN_TIMEOUT_STEPS:
            should_lift = True
            reason = f"timeout ({steps_since_ban} steps)"
        
        if should_lift:
            del self.transition_bans[map_id]
            print(f"  ‚úÖ BAN LIFTED: Map {map_id} - {reason}")

    # =========================================================================
    # DEBT SYSTEMS
    # =========================================================================
    
    def get_temp_debt(self, map_id):
        memory = self.get_current_map_memory(map_id)
        raw_debt = memory.get('temp_debt', 0.0)
        last_visit = memory.get('last_visited_timestep', 0)
        
        if map_id != self.current_map_id:
            steps_away = self.timestep - last_visit
            decayed = max(0.0, raw_debt - steps_away * self.TEMP_DEBT_DECAY)
            return decayed
        
        return raw_debt

    def accumulate_temp_debt(self, map_id):
        memory = self.get_current_map_memory(map_id)
        current_debt = memory.get('temp_debt', 0.0)
        memory['temp_debt'] = min(self.TEMP_DEBT_MAX, current_debt + self.TEMP_DEBT_ACCUMULATION)

    def get_exploration_coverage(self, map_id):
        memory = self.get_current_map_memory(map_id)
        visited = len(memory['visited_tiles'])
        obstructions = len(memory['obstructions'])
        
        if visited == 0:
            return 0.0
        
        estimated_size = visited + obstructions
        if estimated_size < 10:
            return 0.0
        
        return visited / estimated_size

    def detect_obstruction(self, prev_context, context_state, raw_position, prev_raw_position):
        if prev_context is None or prev_raw_position is None:
            return False
        
        if self.last_action not in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
            return False
        
        if raw_position == prev_raw_position:
            direction = int(context_state[5])
            map_id = int(context_state[2])
            self.record_obstruction(raw_position[0], raw_position[1], map_id, direction)
            return True
        
        return False

    # =========================================================================
    # MENU TRAP B-BOOST
    # =========================================================================
    
    def update_menu_trap_tracking(self, context_state, action_taken, raw_position=None):
        if raw_position is not None:
            current_pos = raw_position
        else:
            current_pos = (round(context_state[0] * 255), round(context_state[1] * 255))
        
        if self.menu_trap_position is not None:
            if current_pos != self.menu_trap_position:
                self.reset_menu_trap_boost()
                return
        
        state_hash = self.get_context_state_hash(context_state)
        
        if state_hash == self.last_context_state_hash:
            if action_taken in ["A", "B", "Start", "Select"]:
                self.menu_trap_frames += 1
                self.menu_trap_position = current_pos
                
                if self.menu_trap_frames > self.MENU_TRAP_THRESHOLD:
                    if self.original_b_utility is None:
                        for a in self.actions():
                            if a.action == 'B':
                                self.original_b_utility = a.utility
                                break
                    
                    self.menu_trap_b_boost = min(
                        self.B_BOOST_MAX,
                        self.menu_trap_b_boost + self.B_BOOST_INCREMENT
                    )
        else:
            if current_pos != self.menu_trap_position:
                self.reset_menu_trap_boost()

    def reset_menu_trap_boost(self):
        if self.menu_trap_b_boost > 1.0:
            if self.original_b_utility is not None:
                for a in self.actions():
                    if a.action == 'B':
                        a.utility = self.original_b_utility
                        break
        
        self.menu_trap_frames = 0
        self.menu_trap_b_boost = 1.0
        self.menu_trap_position = None
        self.original_b_utility = None

    # =========================================================================
    # STANDARD METHODS
    # =========================================================================
    
    def add(self, p):
        self.perceptrons.append(p)
        self._cache_valid = False

    def actions(self):
        return [p for p in self.perceptrons if p.kind == "action"]

    def entities(self):
        return [p for p in self.perceptrons if p.kind == "entity"]

    def get_location_key(self, x, y, map_id, bin_size=5):
        x_bin = int(x // bin_size) * bin_size
        y_bin = int(y // bin_size) * bin_size
        return (int(map_id), x_bin, y_bin)

    def is_near_map_edge(self, x, y):
        return x < 10 or x > 245 or y < 10 or y > 245

    def record_action_execution(self, action_name):
        if action_name:
            self.action_execution_count[action_name] = \
                self.action_execution_count.get(action_name, 0) + 1

    def get_position_stagnation(self):
        if len(self.last_positions) < 2:
            return 0
        
        current_pos = self.last_positions[-1]
        stagnant_frames = 0
        
        for pos in reversed(list(self.last_positions)[:-1]):
            if pos == current_pos:
                stagnant_frames += 1
            else:
                break
        
        return stagnant_frames

    def get_group_weight(self, group):
        group_actions = [a for a in self.actions() if a.group == group]
        if not group_actions:
            return 0.0
        return sum(a.utility for a in group_actions)

    # =========================================================================
    # MODE SWAP & STAGNATION
    # =========================================================================
    
    def get_context_state_hash(self, context_state):
        return (
            round(context_state[0], 2),
            round(context_state[1], 2),
            int(context_state[2]),
            int(context_state[3]),
            round(context_state[4], 2),
            int(context_state[5])
        )

    def check_state_stagnation(self, context_state):
        current_hash = self.get_context_state_hash(context_state)
        
        if current_hash == self.last_context_state_hash:
            self.state_stagnation_count += 1
            if self.state_stagnation_count == 1 and self.last_action:
                self.stagnation_initiator_action = self.last_action
        else:
            self.state_stagnation_count = 0
            self.stagnation_initiator_action = None
        
        self.last_context_state_hash = current_hash
        return self.state_stagnation_count >= self.STATE_STAGNATION_THRESHOLD

    def apply_stagnation_initiator_penalty(self):
        if self.stagnation_initiator_action is None:
            return
        
        action_name = self.stagnation_initiator_action
        
        for a in self.actions():
            if a.action == action_name:
                old_util = a.utility
                a.utility *= self.STAGNATION_INITIATOR_PENALTY
                floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                a.utility = max(a.utility, floor)
                print(f"  üìç STAGNATION PENALTY: {action_name} {old_util:.3f} ‚Üí {a.utility:.3f}")
                break
        
        self.stagnation_initiator_action = None

    def check_productive_change(self, context_state):
        current_map = int(context_state[2])
        current_battle = context_state[3] > 0.5
        current_pos = (context_state[0], context_state[1])
        
        productive = False
        reason = ""
        
        if self.last_map_id is not None and current_map != self.last_map_id:
            productive = True
            reason = "map change"
        
        if self.last_battle_state is not None and current_battle != self.last_battle_state:
            productive = True
            reason = "battle change"
        
        if self.position_at_mode_swap is not None:
            dist = np.sqrt((current_pos[0] - self.position_at_mode_swap[0])**2 + 
                          (current_pos[1] - self.position_at_mode_swap[1])**2)
            if dist > 0.03:
                productive = True
                reason = f"moved {dist*255:.1f} tiles"
        
        self.last_map_id = current_map
        self.last_battle_state = current_battle
        
        return productive, reason

    def on_productive_change(self, reason):
        self.move_to_interact_threshold = self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD
        self.interact_to_move_threshold = self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD
        self.swap_chain_count = 0
        self.state_stagnation_count = 0
        self.stagnation_initiator_action = None
        self.unproductive_swap_count = 0
        self.swap_chain_active = False
        self.utilities_before_swapping = {}

    def on_mode_swap(self, from_mode, to_mode):
        self.swap_chain_count += 1
        self.frames_in_current_mode = 0
        self.unproductive_swap_count += 1
        
        if self.unproductive_swap_count >= self.UNPRODUCTIVE_SWAP_THRESHOLD:
            self._reset_highest_to_third(to_mode)
            self.unproductive_swap_count = 0
        
        if to_mode == "interact":
            self.interact_to_move_threshold = min(
                self.MAX_THRESHOLD,
                self.interact_to_move_threshold + self.THRESHOLD_INCREMENT
            )
        else:
            self.move_to_interact_threshold = min(
                self.MAX_THRESHOLD,
                self.move_to_interact_threshold + self.THRESHOLD_INCREMENT
            )

    def _reset_highest_to_third(self, mode):
        if mode == "battle":
            return
        
        group = "move" if mode == "move" else "interact"
        group_actions = [a for a in self.actions() if a.group == group]
        
        if len(group_actions) < 3:
            return
        
        sorted_actions = sorted(group_actions, key=lambda a: a.utility, reverse=True)
        highest = sorted_actions[0]
        third_util = sorted_actions[2].utility
        
        old_util = highest.utility
        highest.utility = third_util * 0.9
        
        floor = self.INTERACT_UTILITY_FLOOR if group == "interact" else self.MOVE_UTILITY_FLOOR
        highest.utility = max(highest.utility, floor)

    def determine_control_mode(self, context_state, raw_position=None):
        in_battle = context_state[3] > 0.5
        
        if in_battle:
            return "battle"
        
        self.frames_in_current_mode += 1
        position_stagnation = self.get_position_stagnation()
        
        productive, reason = self.check_productive_change(context_state)
        if productive:
            self.on_productive_change(reason)
        
        state_is_static = self.check_state_stagnation(context_state)
        
        if state_is_static:
            self.apply_stagnation_initiator_penalty()
            
            if self.control_mode == "move":
                self.control_mode = "interact"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.on_mode_swap("move", "interact")
            else:
                self.control_mode = "move"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.on_mode_swap("interact", "move")
            
            self.state_stagnation_count = 0
            return self.control_mode
        
        # NEW: Check if current tile needs interaction probing
        if raw_position is not None:
            raw_x, raw_y = raw_position
        else:
            raw_x = int(context_state[0] * 255)
            raw_y = int(context_state[1] * 255)
        
        current_map = int(context_state[2])
        tile_needs_probing = self.should_interact_at_tile(raw_x, raw_y, current_map)
        untried_directions = self.get_untried_directions(raw_x, raw_y, current_map)
        
        # If tile needs probing and has untried directions, favor interact mode
        if tile_needs_probing and untried_directions and self.control_mode == "move":
            # Only switch if we've been in move mode for a bit
            if self.frames_in_current_mode >= 3:
                self.control_mode = "interact"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.frames_in_current_mode = 0
                return self.control_mode
        
        # Standard mode swap logic
        if self.control_mode == "move":
            if position_stagnation >= self.move_to_interact_threshold:
                self.control_mode = "interact"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.on_mode_swap("move", "interact")
        
        elif self.control_mode == "interact":
            # If tile is exhausted or all directions tried, go back to move
            if not tile_needs_probing or not untried_directions:
                if self.frames_in_current_mode >= 5:
                    self.control_mode = "move"
                    self.position_at_mode_swap = (context_state[0], context_state[1])
                    self.frames_in_current_mode = 0
                    return self.control_mode
            
            if self.frames_in_current_mode >= self.interact_to_move_threshold:
                self.control_mode = "move"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.on_mode_swap("interact", "move")
        
        return self.control_mode

    # =========================================================================
    # EXPLORATION TRACKING
    # =========================================================================
    
    def update_exploration_tracking(self, context_state, prev_context_state, raw_position=None, prev_raw_position=None):
        current_map = int(context_state[2])
        
        if raw_position is not None:
            raw_x, raw_y = raw_position
        else:
            raw_x, raw_y = int(context_state[0] * 255), int(context_state[1] * 255)
        
        current_pos = (raw_x, raw_y)
        
        # Handle map change
        if self.current_map_id is not None and current_map != self.current_map_id:
            prev_map = self.current_map_id
            
            if prev_context_state is not None and prev_raw_position is not None:
                self.record_transition(
                    prev_raw_position, prev_map, current_map,
                    int(prev_context_state[5]), 
                    'interact' if self.last_action == 'A' else 'walk'
                )
            
            # Create transition ban for the way back
            if prev_raw_position is not None:
                # The direction that leads BACK is opposite of how we entered
                entry_direction = int(context_state[5]) if prev_context_state is not None else 0
                # We want to ban the direction that would take us back
                opposite_dir = (entry_direction + 2) % 4  # Opposite direction
                self.create_transition_ban(current_map, current_pos, opposite_dir)
            
            self.on_map_change(current_map)
        
        self.current_map_id = current_map
        
        self.record_visited_tile(raw_x, raw_y, current_map)
        self.accumulate_temp_debt(current_map)
        
        # Update transition ban state
        self.update_transition_ban(current_map, current_pos)
        self.check_ban_lift_conditions(current_map)
        
        if prev_context_state is not None and prev_raw_position is not None:
            self.detect_obstruction(prev_context_state, context_state, raw_position, prev_raw_position)
        
        # Check interaction verification
        self.check_interaction_verification(context_state, prev_context_state)
        
        self.last_direction = int(context_state[5])

    def on_map_change(self, new_map):
        self.save_exploration_memory()
        
        self.control_mode = "move"
        self.frames_in_current_mode = 0
        
        memory = self.get_current_map_memory(new_map)
        visited_count = len(memory['visited_tiles'])
        obs_count = len(memory['obstructions'])
        transitions = len(memory.get('transitions', []))
        
        # Count interaction stats
        tile_interactions = memory.get('tile_interactions', {})
        exhausted_count = sum(1 for t in tile_interactions.values() if t.get('exhausted', False))
        probed_count = len(tile_interactions)
        
        print(f"  üó∫Ô∏è MAP CHANGE ‚Üí {new_map}: {visited_count} visited, {obs_count} obs, {transitions} transitions")
        print(f"     Tiles probed: {probed_count}, exhausted: {exhausted_count}")

    # =========================================================================
    # REPETITION & PATTERN HANDLING
    # =========================================================================
    
    def track_consecutive_action(self, action_name):
        if action_name == self.current_repeated_action:
            self.consecutive_action_count += 1
        else:
            self.current_repeated_action = action_name
            self.consecutive_action_count = 1

    def get_learning_multiplier(self, action_name):
        if action_name != self.current_repeated_action:
            return 1.0
        
        if self.consecutive_action_count < self.LEARNING_SLOWDOWN_START:
            return 1.0
        
        progress = (self.consecutive_action_count - self.LEARNING_SLOWDOWN_START) / \
                   (self.LEARNING_SLOWDOWN_MAX - self.LEARNING_SLOWDOWN_START)
        progress = min(1.0, progress)
        
        multiplier = 1.0 - (0.95 * progress)
        return max(0.05, multiplier)

    def get_nth_highest_utility(self, group, n=3):
        actions = [a for a in self.actions() if a.group == group]
        
        if len(actions) < n:
            return self.INTERACT_UTILITY_FLOOR if group == "interact" else self.MOVE_UTILITY_FLOOR
        
        utilities = sorted([a.utility for a in actions], reverse=True)
        return utilities[n-1]

    def detect_pattern(self):
        if len(self.action_history) < 6:
            return None, 0
        
        recent = list(self.action_history)[-self.PATTERN_CHECK_WINDOW:]
        
        for pattern_len in range(1, self.PATTERN_MAX_LENGTH + 1):
            if len(recent) < pattern_len * self.PATTERN_MIN_REPEATS:
                continue
            
            candidate = tuple(recent[-pattern_len:])
            repeat_count = 0
            idx = len(recent) - pattern_len
            
            while idx >= 0:
                segment = tuple(recent[idx:idx + pattern_len])
                if segment == candidate:
                    repeat_count += 1
                    idx -= pattern_len
                else:
                    break
            
            if repeat_count >= self.PATTERN_MIN_REPEATS:
                return candidate, repeat_count
        
        return None, 0

    def apply_pattern_penalty(self):
        pattern, repeat_count = self.detect_pattern()
        
        if pattern is None:
            self.detected_pattern = None
            self.pattern_repeat_count = 0
            return
        
        self.detected_pattern = pattern
        self.pattern_repeat_count = repeat_count
        
        pattern_actions = set(pattern)
        
        for action_name in pattern_actions:
            third_util = self.get_nth_highest_utility(
                "interact" if action_name in ["A", "B", "Start", "Select"] else "move", 
                n=3
            )
            
            for a in self.actions():
                if a.action == action_name:
                    a.utility = third_util * 0.9
                    floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                    a.utility = max(a.utility, floor)
                    break

    def apply_repetition_penalty(self):
        if self.current_repeated_action is None:
            return
        
        repeated_perceptron = None
        for a in self.actions():
            if a.action == self.current_repeated_action:
                repeated_perceptron = a
                break
        
        if repeated_perceptron is None:
            return
        
        if self.consecutive_action_count >= self.HARD_RESET_THRESHOLD:
            third_util = self.get_nth_highest_utility(repeated_perceptron.group, n=3)
            repeated_perceptron.utility = third_util * 0.9
            
            floor = self.INTERACT_UTILITY_FLOOR if repeated_perceptron.group == "interact" else self.MOVE_UTILITY_FLOOR
            repeated_perceptron.utility = max(repeated_perceptron.utility, floor)
            
            self.consecutive_action_count = 0
            
        elif self.consecutive_action_count >= self.PENALTY_THRESHOLD:
            repeated_perceptron.utility *= 0.7
            
            floor = self.INTERACT_UTILITY_FLOOR if repeated_perceptron.group == "interact" else self.MOVE_UTILITY_FLOOR
            repeated_perceptron.utility = max(repeated_perceptron.utility, floor)

    # =========================================================================
    # ENTITY & LEARNING
    # =========================================================================
    
    def spawn_innate_entities(self, learning_state):
        if self.innate_entities_spawned:
            return
        
        for etype, indices in [("sense_menu", [5, 6]), ("sense_battle", [3, 4]), 
                                ("sense_movement", [0, 1]), ("sense_map_transition", [2])]:
            entity = Perceptron("entity", entity_type=etype)
            entity.ensure_weights(len(learning_state))
            entity.weights = np.zeros(len(learning_state))
            for idx in indices:
                entity.weights[idx] = 0.5 if len(indices) > 1 else 1.0
            self.add(entity)
        
        self.innate_entities_spawned = True

    def enforce_utility_floors(self):
        for a in self.actions():
            if a.group == "move":
                a.utility = max(a.utility, self.MOVE_UTILITY_FLOOR)
            elif a.group == "interact":
                a.utility = max(a.utility, self.INTERACT_UTILITY_FLOOR)

    def get_spawn_threshold_adaptive(self, error_type='combined', percentile=50):
        if error_type == 'numeric':
            history = self.numeric_error_history
        elif error_type == 'visual':
            history = self.visual_error_history
        else:
            history = self.error_history
        
        if len(history) < 100:
            return 0.0005
        
        return max(0.001, np.percentile(history, percentile))

    def stagnation_level(self, window=10):
        if len(self.prev_learning_states) < window:
            return 0.0
        
        recent = list(self.prev_learning_states)[-window:]
        diffs = [np.linalg.norm(recent[i] - recent[i-1]) for i in range(1, len(recent))]
        return 1.0 - np.tanh(np.mean(diffs) * 2.0)

    def predict_future_error(self, state, action, context_state, raw_position=None):
        if self.entities():
            predictions = [e.predict(state) * e.utility for e in self.entities()]
            entity_novelty = np.mean(predictions)
        else:
            entity_novelty = 0.5
        
        combined = entity_novelty * 0.7 + action.utility * 0.3
        
        current_map = int(context_state[2])
        
        if raw_position is not None:
            location = self.get_location_key(raw_position[0], raw_position[1], current_map)
        else:
            location = self.get_location_key(context_state[0] * 255, context_state[1] * 255, current_map)
        
        map_debt = self.map_novelty_debt.get(current_map, 0.0)
        temp_debt = self.get_temp_debt(current_map)
        loc_debt = self.location_novelty.get(location, 0.0)
        
        total_debt = map_debt + temp_debt + loc_debt * 0.5
        debt_penalty = 1.0 / (1.0 + total_debt * 5.0)
        combined *= debt_penalty
        
        if action.action == self.current_repeated_action and self.consecutive_action_count > self.LEARNING_SLOWDOWN_START:
            repetition_penalty = 1.0 / (1.0 + (self.consecutive_action_count - self.LEARNING_SLOWDOWN_START) * 0.15)
            combined *= repetition_penalty
        
        if self.detected_pattern and action.action in self.detected_pattern:
            pattern_penalty = 1.0 / (1.0 + self.pattern_repeat_count * 0.2)
            combined *= pattern_penalty
        
        return combined + np.random.randn() * 0.05

    def _update_entity_cache(self):
        self._entity_norms_cache.clear()
        for i, e in enumerate(self.entities()):
            if e.weights is not None:
                norm = np.linalg.norm(e.weights)
                if norm > 1e-6:
                    self._entity_norms_cache[i] = norm
        self._cache_valid = True

    def merge_similar_entities(self, merge_threshold=0.80):
        entities = self.entities()
        if len(entities) < 5:
            return
        
        if not self._cache_valid:
            self._update_entity_cache()
        
        merged_indices = set()
        
        for i, e1 in enumerate(entities):
            if i in merged_indices or i not in self._entity_norms_cache:
                continue
            
            e1_norm = self._entity_norms_cache[i]
            cluster = [e1]
            
            for j, e2 in enumerate(entities):
                if j <= i or j in merged_indices or j not in self._entity_norms_cache:
                    continue
                
                e2_norm = self._entity_norms_cache[j]
                similarity = np.dot(e1.weights, e2.weights) / (e1_norm * e2_norm)
                
                if similarity > merge_threshold:
                    cluster.append(e2)
                    merged_indices.add(j)
            
            if len(cluster) > 1:
                total_utility = sum(e.utility for e in cluster)
                merged_weights = sum(e.weights * e.utility for e in cluster) / total_utility
                e1.weights = merged_weights
                e1.utility = min(total_utility, 2.0)
                self._cache_valid = False

    def compute_multi_modal_error(self, state, next_state):
        vel_x = abs(next_state[0] - state[0])
        vel_y = abs(next_state[1] - state[1])
        map_trans = abs(next_state[2] - state[2])
        battle_start = abs(next_state[3] - state[3])
        battle_end = abs(next_state[4] - state[4])
        menu_open = abs(next_state[5] - state[5])
        menu_close = abs(next_state[6] - state[6])
        dir_change = abs(next_state[7] - state[7])
        visual = np.linalg.norm(next_state[8:] - state[8:])
        
        weighted = (vel_x*0.5 + vel_y*0.5 + map_trans*10.0 + battle_start*5.0 + 
                   battle_end*3.0 + menu_open*2.0 + menu_close*1.5 + dir_change*0.3 + visual*2.0)
        
        numeric = vel_x + vel_y + map_trans + battle_start + battle_end + menu_open + menu_close + dir_change
        
        return weighted, numeric, visual

    def learn(self, learning_state, next_learning_state, context_state, next_context_state, dead=False,
              raw_position=None, next_raw_position=None):
        if learning_state.shape != next_learning_state.shape:
            max_dim = max(learning_state.shape[0], next_learning_state.shape[0])
            learning_state = np.pad(learning_state, (0, max(0, max_dim - learning_state.shape[0])))
            next_learning_state = np.pad(next_learning_state, (0, max(0, max_dim - next_learning_state.shape[0])))
        
        if not self.innate_entities_spawned:
            self.spawn_innate_entities(learning_state)
        
        prev_context = self.prev_context_states[-1] if self.prev_context_states else None
        
        prev_raw = None
        if hasattr(self, '_last_raw_position'):
            prev_raw = self._last_raw_position
        
        self.update_exploration_tracking(context_state, prev_context, 
                                         raw_position=raw_position, 
                                         prev_raw_position=prev_raw)
        
        self._last_raw_position = raw_position
        
        weighted_error, numeric_error, visual_error = self.compute_multi_modal_error(learning_state, next_learning_state)
        
        self.error_history.append(weighted_error)
        self.numeric_error_history.append(numeric_error)
        self.visual_error_history.append(visual_error)
        
        current_map = int(context_state[2])
        
        if raw_position is not None:
            location = self.get_location_key(raw_position[0], raw_position[1], current_map)
        else:
            location = self.get_location_key(context_state[0] * 255, context_state[1] * 255, current_map)
        
        self.visited_maps[current_map] = self.visited_maps.get(current_map, 0) + 1
        self.location_memory[location] = self.location_memory.get(location, 0) + 1
        
        map_visits = self.visited_maps[current_map]
        if map_visits > 10:
            self.map_novelty_debt[current_map] = self.map_novelty_debt.get(current_map, 0.0) + 0.05 * (map_visits - 10)
        
        loc_visits = self.location_memory[location]
        if loc_visits > 15:
            self.location_novelty[location] = self.location_novelty.get(location, 0.0) + 0.1 * (loc_visits - 15)
        
        if map_visits > 30:
            weighted_error *= 0.5
        if loc_visits > 25:
            weighted_error *= 0.7
        
        stagnation = self.stagnation_level()
        
        learning_mult = self.get_learning_multiplier(self.last_action) if self.last_action else 1.0
        if self.detected_pattern and self.last_action in self.detected_pattern:
            learning_mult *= 0.5
        
        for p in self.perceptrons:
            mult = learning_mult if (p.kind == "action" and p.action == self.last_action) else 1.0
            if p.kind == "action" and self.detected_pattern and p.action in self.detected_pattern:
                mult *= 0.5
            p.update(learning_state, weighted_error * mult, stagnation=stagnation)
        
        self.apply_repetition_penalty()
        self.apply_pattern_penalty()
        self.enforce_utility_floors()
        
        if prev_context is not None and np.linalg.norm(context_state[:2] - prev_context[:2]) > 0.001:
            if self.last_action:
                for a in self.actions():
                    if a.action == self.last_action:
                        if raw_position is not None:
                            boost = 1.15 if self.is_near_map_edge(raw_position[0], raw_position[1]) else 1.08
                        else:
                            boost = 1.08
                        a.utility = min(a.utility * boost, 2.0)
                        break
        
        if self.timestep % self.SAVE_INTERVAL == 0:
            self.save_exploration_memory()
        
        if self.timestep % 500 == 0 and len(self.entities()) < 50:
            if numeric_error > self.get_spawn_threshold_adaptive('numeric') or visual_error > self.get_spawn_threshold_adaptive('visual'):
                new_entity = Perceptron("entity", entity_type="learned")
                new_entity.ensure_weights(len(learning_state))
                new_entity.weights = learning_state.copy() * 0.05 + np.random.randn(len(learning_state)) * 0.001
                self.add(new_entity)
        
        if self.timestep % 200 == 0:
            self.merge_similar_entities()
        
        self.perceptrons = [p for p in self.perceptrons if p.kind == "action" or (p.utility > 0.05 and (p.weights is None or np.linalg.norm(p.weights) < 100))]
        
        self.action_history.append(self.last_action)

    def log_state(self, learning_state, context_state):
        self.prev_learning_states.append(learning_state)
        self.prev_context_states.append(context_state)

    def update_position(self, x, y):
        self.last_positions.append((int(x), int(y)))
    
    def get_tile_interaction_stats(self, map_id):
        """Get stats about tile interaction probing for logging."""
        memory = self.get_current_map_memory(map_id)
        tile_interactions = memory.get('tile_interactions', {})
        
        total_tiles = len(tile_interactions)
        exhausted_tiles = sum(1 for t in tile_interactions.values() if t.get('exhausted', False))
        tiles_with_success = sum(1 for t in tile_interactions.values() 
                                  if any(t.get('direction_successes', {}).get(d, 0) > 0 for d in range(4)))
        
        return {
            'probed': total_tiles,
            'exhausted': exhausted_tiles,
            'with_success': tiles_with_success
        }

In [19]:
# ============================================================================
# CELL 4: Action Selection - Tile-Based Interaction Probing
# ============================================================================

GBA_ACTIONS = ["Up", "Down", "Left", "Right", "A", "B", "Start", "Select"]

ACTION_DELTAS = {
    "UP": (0, -1),
    "DOWN": (0, 1),
    "LEFT": (-1, 0),
    "RIGHT": (1, 0)
}

# Direction int to action name
DIRECTION_TO_ACTION = {
    0: "DOWN",
    1: "UP",
    2: "LEFT",
    3: "RIGHT"
}

# Action name to direction int
ACTION_TO_DIRECTION = {
    "DOWN": 0,
    "UP": 1,
    "LEFT": 2,
    "RIGHT": 3
}

def manhattan_distance(pos1, pos2):
    return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])


def anticipatory_action(brain, learning_state, context_state, 
                       exploration_weight=1.3, min_interact_prob=0.15,
                       raw_position=None):
    """
    Action selection with tile-based interaction probing:
    
    MOVEMENT:
    - Prioritize unvisited tiles (novelty bonus)
    - Avoid known obstructions
    - Consider transition attraction when map is well-explored
    - Respect transition bans
    
    INTERACTION:
    - On new tiles, try A in all 4 directions (one at a time)
    - Track success rate per tile/direction
    - Mark tiles exhausted when all directions tried with no success
    - Never interact on exhausted tiles
    
    Plus existing features:
    - Strict mode filtering
    - Menu trap B-boost
    - Pattern/repetition penalties
    """
    actions_list = brain.actions()
    if not actions_list:
        return Perceptron("action", action="UP", group="move")

    mode = brain.determine_control_mode(context_state, raw_position=raw_position)
    current_map = int(context_state[2])
    current_dir = int(context_state[5])
    
    if raw_position is not None:
        raw_x, raw_y = raw_position
    else:
        raw_x, raw_y = int(context_state[0] * 255), int(context_state[1] * 255)
    
    current_pos = (raw_x, raw_y)
    
    # Get exploration memory
    memory = brain.get_current_map_memory(current_map)
    visited_tiles = memory['visited_tiles']
    obstructions = memory['obstructions']
    
    # Get tile interaction state
    tile_needs_probing = brain.should_interact_at_tile(raw_x, raw_y, current_map)
    untried_directions = brain.get_untried_directions(raw_x, raw_y, current_map)
    best_interact_direction = brain.get_best_interaction_direction(raw_x, raw_y, current_map)
    
    # Get transition info
    transition_attraction, best_transition = brain.get_transition_attraction(current_map)
    coverage = brain.get_exploration_coverage(current_map)
    
    # Get group weights for battle mode
    move_group_weight = brain.get_group_weight("move")
    interact_group_weight = brain.get_group_weight("interact")
    total_group_weight = move_group_weight + interact_group_weight + 1e-9
    move_group_prob = move_group_weight / total_group_weight
    
    # === STRICT MODE FILTERING ===
    if mode == "battle":
        if np.random.random() < move_group_prob:
            allowed = [a for a in actions_list if a.group == "move"]
        else:
            allowed = [a for a in actions_list if a.group == "interact"]
        all_actions = actions_list
        
    elif mode == "interact":
        allowed = [a for a in actions_list if a.group == "interact"]
        all_actions = None
        
    else:  # move
        allowed = [a for a in actions_list if a.group == "move"]
        all_actions = None

    # === SCORE ACTIONS ===
    action_scores = []
    
    for a in allowed:
        predicted = brain.predict_future_error(learning_state, a, context_state, raw_position=raw_position)
        
        # --- MOVE ACTIONS ---
        if a.group == "move":
            if mode == "move":
                predicted *= exploration_weight
            
            dx, dy = ACTION_DELTAS.get(a.action, (0, 0))
            target_tile = (raw_x + dx, raw_y + dy)
            action_direction = ACTION_TO_DIRECTION.get(a.action, -1)
            
            # BONUS: Unvisited tile (main novelty driver)
            if target_tile not in visited_tiles:
                predicted *= brain.UNVISITED_TILE_BONUS
            
            # PENALTY: Known obstruction
            if target_tile in obstructions:
                predicted *= brain.OBSTRUCTION_PENALTY
            
            # PENALTY: Transition ban
            if brain.is_position_banned(current_map, raw_x, raw_y, action_direction):
                predicted *= 0.05  # Heavy penalty for banned transitions
            
            # BONUS: Moving toward transition when map is well-explored
            if transition_attraction > 0.3 and best_transition and coverage > 0.5:
                trans_pos = best_transition['position']
                if isinstance(trans_pos, list):
                    trans_pos = tuple(trans_pos)
                
                current_dist = manhattan_distance(current_pos, trans_pos)
                new_dist = manhattan_distance(target_tile, trans_pos)
                
                if new_dist < current_dist:
                    predicted *= (1.0 + transition_attraction)
            
            # Small random factor for exploration variety
            predicted *= (0.9 + np.random.random() * 0.2)
        
        # --- INTERACT ACTIONS ---
        elif a.group == "interact":
            predicted = max(predicted, min_interact_prob)
            
            # === MENU TRAP B-BOOST ===
            if a.action == 'B':
                predicted *= brain.menu_trap_b_boost
            
            # === A-PRESS LOGIC ===
            if a.action == 'A':
                if tile_needs_probing:
                    # Check if current facing direction is untried
                    if current_dir in untried_directions:
                        # Strong boost - we want to try this direction!
                        predicted *= 3.0
                    elif best_interact_direction is not None:
                        # There's a direction with known success
                        if current_dir == best_interact_direction:
                            success_rate = brain.get_direction_success_rate(
                                raw_x, raw_y, current_map, current_dir
                            )
                            if success_rate and success_rate > 0.1:
                                predicted *= (1.5 + success_rate)
                        else:
                            # Not facing the best direction, mild penalty
                            predicted *= 0.7
                    else:
                        # All directions tried, none successful - don't bother
                        predicted *= 0.3
                else:
                    # Tile is exhausted - strong penalty for A
                    predicted *= 0.1
            
            # === START/SELECT ===
            if a.action in ['Start', 'Select']:
                # Generally low priority unless stuck
                if brain.state_stagnation_count > 10:
                    predicted *= 1.5
                else:
                    predicted *= 0.5
        
        action_scores.append((a, predicted))

    # === SELECT BEST ===
    if action_scores:
        best_action = max(action_scores, key=lambda x: x[1])[0]
        best_score = max(s for _, s in action_scores)
        
        if best_score > 0.01:
            brain.record_action_execution(best_action.action)
            brain.track_consecutive_action(best_action.action)
            
            # If this is an A-press, start interaction verification
            if best_action.action == 'A' and tile_needs_probing:
                brain.start_interaction_verification(raw_x, raw_y, current_map, current_dir)
            
            return best_action
    
    # === FALLBACKS ===
    
    # Battle fallback
    if mode == "battle" and all_actions:
        all_scores = [(a, brain.predict_future_error(learning_state, a, context_state, raw_position=raw_position)) 
                      for a in all_actions]
        if all_scores:
            best_action = max(all_scores, key=lambda x: x[1])[0]
            brain.record_action_execution(best_action.action)
            brain.track_consecutive_action(best_action.action)
            return best_action
    
    # Move fallback: prefer unvisited directions
    if mode == "move":
        for a in allowed:
            dx, dy = ACTION_DELTAS.get(a.action, (0, 0))
            target = (raw_x + dx, raw_y + dy)
            if target not in visited_tiles and target not in obstructions:
                brain.record_action_execution(a.action)
                brain.track_consecutive_action(a.action)
                return a
    
    # Generic fallback
    if allowed:
        best = max(allowed, key=lambda a: a.utility)
        brain.record_action_execution(best.action)
        brain.track_consecutive_action(best.action)
        return best
    
    best = max(actions_list, key=lambda a: a.utility)
    brain.record_action_execution(best.action)
    brain.track_consecutive_action(best.action)
    return best

In [20]:
# ============================================================================
# CELL 6: Main Loop - Tile-Based Interaction Probing
# ============================================================================

brain = Brain()

# Action perceptrons
for b in ["UP", "DOWN", "LEFT", "RIGHT"]:
    brain.add(Perceptron("action", action=b, group="move"))
for b in ["A", "B", "Start", "Select"]:
    brain.add(Perceptron("action", action=b, group="interact"))

exploration_weight = 1.3
prev_context_state = None
prev_raw_position = None

print("="*70)
print("AI CONTROL - v7.0 (Tile-Based Interaction Probing)")
print("="*70)
print("STATE FORMAT FROM LUA:")
print("  [x, y, map_id, in_battle, menu_flag, direction]")
print("  x, y: 0-255 tile coordinates")
print("  direction: 0=DOWN, 1=UP, 2=LEFT, 3=RIGHT")
print("="*70)
print("NEW: TILE-BASED INTERACTION PROBING:")
print("  - On each new tile, try A in all 4 directions")
print("  - Track success rate per direction (handles moving NPCs)")
print("  - Mark tile exhausted when all directions tried, none successful")
print("  - Never waste A-presses on exhausted tiles")
print("="*70)
print("MOVEMENT:")
print(f"  - Unvisited tile bonus: {brain.UNVISITED_TILE_BONUS}x")
print(f"  - Obstruction penalty: {brain.OBSTRUCTION_PENALTY}x")
print("  - Random exploration with novelty preference")
print("="*70)
print("TRANSITION BAN SYSTEM:")
print(f"  - Ban vicinity radius: {brain.BAN_VICINITY_RADIUS} tiles")
print(f"  - Coverage lift threshold: {brain.BAN_COVERAGE_LIFT_THRESHOLD:.0%}")
print(f"  - Timeout: {brain.BAN_TIMEOUT_STEPS} steps")
print("="*70)
print("EXISTING FEATURES:")
print(f"  - Action confirmation ({brain.ACTION_CONFIRM_FRAMES} frames)")
print(f"  - Pattern detection up to {brain.PATTERN_MAX_LENGTH} actions")
print(f"  - Menu trap B-boost (max {brain.B_BOOST_MAX}x)")
print(f"  - State stagnation threshold: {brain.STATE_STAGNATION_THRESHOLD}")
print("="*70)
print(f"PERSISTENT MEMORY: {brain.EXPLORATION_MEMORY_FILE}")
print("="*70)

while True:
    # Read state
    context_state, palette_state, tile_state, dead, raw_position = read_game_state()
    
    raw_x, raw_y = raw_position
    in_battle = context_state[3]
    current_map = int(context_state[2])
    current_dir = int(context_state[5])
    
    brain.update_position(raw_x, raw_y)

    derived = compute_derived_features(context_state, prev_context_state)
    learning_state = build_learning_state(derived, palette_state, tile_state, in_battle)
    
    brain.log_state(learning_state, context_state)
    
    # === ACTION EXECUTION CONFIRMATION ===
    brain.confirm_action_executed(context_state, prev_context_state)

    if brain.should_send_new_action():
        action = anticipatory_action(
            brain, learning_state, context_state,
            exploration_weight=exploration_weight,
            raw_position=raw_position
        )

        if action is not None:
            write_action(action.action)
            brain.last_action = action.action
            brain.set_pending_action(action.action)
            
            brain.update_menu_trap_tracking(context_state, action.action, raw_position=raw_position)
        else:
            write_action("NONE")
    else:
        if brain.pending_action:
            write_action(brain.pending_action)

    # === LOGGING ===
    if brain.timestep % 100 == 0:
        memory = brain.get_current_map_memory(current_map)
        
        visited_count = len(memory['visited_tiles'])
        obs_count = len(memory['obstructions'])
        interactables = len(memory['interactable_objects'])
        coverage = brain.get_exploration_coverage(current_map)
        transitions = memory.get('transitions', [])
        
        # Tile interaction stats
        tile_stats = brain.get_tile_interaction_stats(current_map)
        
        # Current tile state
        tile_needs_probing = brain.should_interact_at_tile(raw_x, raw_y, current_map)
        untried_dirs = brain.get_untried_directions(raw_x, raw_y, current_map)
        
        dir_name = brain.DIRECTION_NAMES.get(current_dir, '?')
        
        print(f"\n{'='*70}")
        print(f"Step {brain.timestep} | Map {current_map} | Pos ({raw_x}, {raw_y}) facing {dir_name}")
        print(f"  Mode: {brain.control_mode} | Battle: {int(in_battle)}")
        
        # Exploration status
        print(f"\n  üìä EXPLORATION:")
        print(f"     Visited: {visited_count} | Obstructions: {obs_count} | Coverage: {coverage:.0%}")
        print(f"     Interactables found: {interactables}")
        
        # Tile interaction probing
        print(f"\n  üéØ TILE PROBING:")
        print(f"     Tiles probed: {tile_stats['probed']} | Exhausted: {tile_stats['exhausted']} | With success: {tile_stats['with_success']}")
        
        if tile_needs_probing:
            untried_names = [brain.DIRECTION_NAMES.get(d, '?') for d in untried_dirs]
            print(f"     Current tile: NEEDS PROBING - untried: {untried_names}")
        else:
            print(f"     Current tile: EXHAUSTED or fully probed")
        
        # Show success rates for current tile if any
        tile_state_data = brain.get_tile_interaction_state(raw_x, raw_y, current_map)
        success_info = []
        for d in range(4):
            attempts = tile_state_data['direction_attempts'].get(d, 0)
            successes = tile_state_data['direction_successes'].get(d, 0)
            if attempts > 0:
                rate = successes / attempts
                success_info.append(f"{brain.DIRECTION_NAMES.get(d, '?')}:{successes}/{attempts}({rate:.0%})")
        if success_info:
            print(f"     Direction success: {', '.join(success_info)}")
        
        # Transitions
        if transitions:
            print(f"\n  üö™ TRANSITIONS: {len(transitions)} known")
            for t in transitions[:3]:
                pos = t['position']
                if isinstance(pos, list):
                    pos = tuple(pos)
                banned = "üö´" if brain.is_transition_banned(current_map, pos, t['direction']) else ""
                print(f"     ({pos[0]},{pos[1]}) ‚Üí Map {t['destination_map']} (used {t['use_count']}x) {banned}")
        
        # Transition bans
        if current_map in brain.transition_bans:
            ban = brain.transition_bans[current_map]
            ban_tile = ban['banned_tile']
            ban_dir = brain.DIRECTION_NAMES.get(ban['banned_direction'], '?')
            vicinity = "ACTIVE" if ban['vicinity_active'] else "positional only"
            steps_since = brain.timestep - ban['created_at']
            print(f"\n  üö´ TRANSITION BAN:")
            print(f"     Tile {ban_tile} facing {ban_dir} | Vicinity: {vicinity}")
            print(f"     Age: {steps_since}/{brain.BAN_TIMEOUT_STEPS} steps")
        
        # Transition attraction
        attraction, best_trans = brain.get_transition_attraction(current_map)
        if attraction > 0.1 and best_trans:
            print(f"\n  ‚≠ê TRANSITION ATTRACTION: Map {best_trans['destination_map']} = {attraction:.2f}")
        
        # Debt info
        map_debt = brain.map_novelty_debt.get(current_map, 0.0)
        temp_debt = brain.get_temp_debt(current_map)
        if map_debt > 0.1 or temp_debt > 0.1:
            print(f"\n  üí≥ DEBT: permanent={map_debt:.2f}, temp={temp_debt:.2f}")
        
        # Menu trap status
        if brain.menu_trap_b_boost > 1.0:
            print(f"\n  üîí MENU TRAP: B boost {brain.menu_trap_b_boost:.2f}x ({brain.menu_trap_frames} frames)")
        
        # Pending action
        if brain.pending_action:
            print(f"\n  ‚è≥ Pending: {brain.pending_action} ({brain.pending_action_frames}/{brain.ACTION_CONFIRM_FRAMES})")
        
        # Pending interaction verification
        if brain.pending_interaction_verify:
            info = brain.pending_interaction_verify
            print(f"  üîç Verifying interaction: ({info['x']},{info['y']}) dir {info['direction']} - {brain.interaction_verify_countdown} frames left")
        
        # Utilities
        action_utils = sorted([(a.action, a.utility) for a in brain.actions()], key=lambda x: x[1], reverse=True)
        print(f"\n  ‚ö° Utilities: {' '.join([f'{k}:{v:.2f}' for k,v in action_utils])}")
        
        # Warnings
        if brain.state_stagnation_count > 10:
            print(f"\n  ‚ö†Ô∏è STAGNATION: {brain.state_stagnation_count}/{brain.STATE_STAGNATION_THRESHOLD}")
        if brain.detected_pattern:
            pattern_str = '-'.join(str(a) for a in brain.detected_pattern)
            print(f"  üîÑ PATTERN ({len(brain.detected_pattern)}): {pattern_str} x{brain.pattern_repeat_count}")

    # === MILESTONES ===
    if brain.timestep % 500 == 0 and brain.timestep > 0:
        total_visited = sum(len(m['visited_tiles']) for m in brain.exploration_memory.values())
        total_obs = sum(len(m['obstructions']) for m in brain.exploration_memory.values())
        total_interactables = sum(len(m['interactable_objects']) for m in brain.exploration_memory.values())
        total_transitions = sum(len(m.get('transitions', [])) for m in brain.exploration_memory.values())
        
        # Tile probing stats across all maps
        total_probed = sum(len(m.get('tile_interactions', {})) for m in brain.exploration_memory.values())
        total_exhausted = sum(
            sum(1 for t in m.get('tile_interactions', {}).values() if t.get('exhausted', False))
            for m in brain.exploration_memory.values()
        )
        
        print(f"\n{'#'*70}")
        print(f"# MILESTONE {brain.timestep}")
        print(f"# Maps: {len(brain.exploration_memory)} | Tiles visited: {total_visited}")
        print(f"# Obstructions: {total_obs} | Interactables: {total_interactables}")
        print(f"# Transitions: {total_transitions}")
        print(f"# Tiles probed: {total_probed} | Exhausted: {total_exhausted}")
        print(f"{'#'*70}")

    time.sleep(0.02)

    # Learn
    next_context, next_palette, next_tiles, dead, next_raw_position = read_game_state()
    next_in_battle = next_context[3]
    next_derived = compute_derived_features(next_context, context_state)
    next_learning_state = build_learning_state(next_derived, next_palette, next_tiles, next_in_battle)

    brain.learn(learning_state, next_learning_state, context_state, next_context, dead=dead, 
                raw_position=raw_position, next_raw_position=next_raw_position)

    prev_context_state = context_state.copy()
    prev_raw_position = raw_position
    brain.timestep += 1

  Loaded exploration memory: 1 maps
AI CONTROL - v7.0 (Tile-Based Interaction Probing)
STATE FORMAT FROM LUA:
  [x, y, map_id, in_battle, menu_flag, direction]
  x, y: 0-255 tile coordinates
  direction: 0=DOWN, 1=UP, 2=LEFT, 3=RIGHT
NEW: TILE-BASED INTERACTION PROBING:
  - On each new tile, try A in all 4 directions
  - Track success rate per direction (handles moving NPCs)
  - Mark tile exhausted when all directions tried, none successful
  - Never waste A-presses on exhausted tiles
MOVEMENT:
  - Unvisited tile bonus: 1.5x
  - Obstruction penalty: 0.25x
  - Random exploration with novelty preference
TRANSITION BAN SYSTEM:
  - Ban vicinity radius: 3 tiles
  - Coverage lift threshold: 60%
  - Timeout: 300 steps
EXISTING FEATURES:
  - Action confirmation (3 frames)
  - Pattern detection up to 10 actions
  - Menu trap B-boost (max 3.0x)
  - State stagnation threshold: 20
PERSISTENT MEMORY: C:\Users\natmaw\Documents\Boston Stuff\CS 5100 Foundations of AI\cogai\exploration_memory.json

Ste

KeyboardInterrupt: 