In [None]:
# ============================================================================
# CELL 1: State Management & Utilities
# ============================================================================
# CHANGES:
# 1. read_game_state now handles BOTH key formats:
#    - Long keys: "state", "palette", "tiles" (old format)
#    - Short keys: "s", "p", "t" (Lua teaching/AI format)
# 2. Added EXPLORATION_MEMORY_FILE and MODEL_CHECKPOINT_FILE paths
# 3. Added TAUGHT_BATTLE_TRANSITIONS_FILE path for battle Markov
# ============================================================================

from pathlib import Path
import json
import numpy as np
import time
from collections import deque

BASE_PATH = Path("C:/Users/HP/Documents/cogai/")
ACTION_FILE = BASE_PATH / "action.json"
STATE_FILE = BASE_PATH / "game_state.json"
TAUGHT_TRANSITIONS_FILE = BASE_PATH / "taught_transitions.json"
TAUGHT_BATTLE_TRANSITIONS_FILE = BASE_PATH / "taught_battle_transitions.json"
EXPLORATION_MEMORY_FILE = BASE_PATH / "exploration_memory.json"
MODEL_CHECKPOINT_FILE = BASE_PATH / "model_checkpoint.json"
TAUGHT_EXPLORATION_FILE = BASE_PATH / "taught_exploration_memory.json"
TAUGHT_NAV_TARGETS_FILE = BASE_PATH / "taught_nav_targets.json"

# === MARKOV SIMILARITY WEIGHTS ===
MARKOV_IMMEDIATE_WEIGHT = 0.5
MARKOV_SEQUENTIAL_WEIGHT = 0.3
MARKOV_PARTIAL_WEIGHT = 0.2
MARKOV_FAMILIARITY_THRESHOLD = 0.6

MARKOV_SEQ_FULL_WEIGHT = 1.0
MARKOV_SEQ_MEDIUM_WEIGHT = 0.6
MARKOV_SEQ_SHORT_WEIGHT = 0.3

MARKOV_POS_EXACT_BONUS = 0.35
MARKOV_POS_NEAR_BONUS = 0.25
MARKOV_POS_FAR_BONUS = 0.1
MARKOV_POS_MAX_DIST = 5

# === BATTLE MARKOV WEIGHTS ===
BATTLE_MARKOV_ACTION_SEQ_WEIGHT = 0.70
BATTLE_MARKOV_PALETTE_WEIGHT = 0.20
BATTLE_MARKOV_MENU_STATE_WEIGHT = 0.10
BATTLE_MARKOV_THRESHOLD_LOW = 0.35
BATTLE_MARKOV_THRESHOLD_HIGH = 0.45

EXPECTED_STATE_DIM = 6
PALETTE_DIM = 768
TILE_DIM = 600

def normalize_game_state(raw_state):
    if len(raw_state) < 6:
        return raw_state
    normalized = raw_state.copy()
    normalized[0] = raw_state[0] / 255.0
    normalized[1] = raw_state[1] / 255.0
    normalized[2] = np.clip(raw_state[2], 0, 255)
    normalized[3] = 1.0 if raw_state[3] > 0 else 0.0
    normalized[4] = 1.0 if raw_state[4] > 0 else 0.0
    normalized[5] = int(raw_state[5]) % 4
    return normalized

def compute_derived_features(current, prev):
    if prev is None:
        return np.zeros(8)
    vel_x = current[0] - prev[0]
    vel_y = current[1] - prev[1]
    map_changed = 1.0 if abs(current[2] - prev[2]) > 0.5 else 0.0
    battle_started = 1.0 if current[3] > prev[3] else 0.0
    battle_ended = 1.0 if current[3] < prev[3] else 0.0
    menu_opened = 1.0 if current[4] > prev[4] else 0.0
    menu_closed = 1.0 if current[4] < prev[4] else 0.0
    direction_changed = 1.0 if current[5] != prev[5] else 0.0
    return np.array([vel_x, vel_y, map_changed, battle_started, battle_ended,
                     menu_opened, menu_closed, direction_changed])

def build_learning_state(derived, palette, tiles, in_battle):
    if in_battle > 0.5:
        state = np.concatenate([derived, palette])
    else:
        state = np.concatenate([derived, tiles, palette])
    noise = np.random.randn(len(state)) * 0.0001
    return state + noise

def _pad_or_trim(arr, target_dim):
    if arr.shape[0] < target_dim:
        return np.pad(arr, (0, target_dim - arr.shape[0]))
    elif arr.shape[0] > target_dim:
        return arr[:target_dim]
    return arr

def parse_game_state_data(data):
    """Parse game state dict handling both long and short key formats."""
    raw = data.get("state") or data.get("s") or []
    palette_raw = data.get("palette") or data.get("p") or []
    tiles_raw = data.get("tiles") or data.get("t") or []
    dead = bool(data.get("dead", False))
    return raw, palette_raw, tiles_raw, dead

def read_game_state(max_retries=3):
    if not STATE_FILE.exists():
        return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0)

    for attempt in range(max_retries):
        try:
            with open(STATE_FILE, "r") as f:
                data = json.loads(f.read())

            raw, palette_raw, tiles_raw, dead = parse_game_state_data(data)

            raw_x = int(raw[0]) if len(raw) > 0 else 0
            raw_y = int(raw[1]) if len(raw) > 1 else 0
            raw_position = (raw_x, raw_y)

            context_state = normalize_game_state(np.array(raw, dtype=float))
            palette_state = np.array(palette_raw, dtype=float) if palette_raw else np.zeros(PALETTE_DIM)
            tile_state = np.array(tiles_raw, dtype=float) if tiles_raw else np.zeros(TILE_DIM)

            context_state = _pad_or_trim(context_state, EXPECTED_STATE_DIM)
            palette_state = _pad_or_trim(palette_state, PALETTE_DIM)
            tile_state = _pad_or_trim(tile_state, TILE_DIM)

            return context_state, palette_state, tile_state, dead, raw_position

        except (json.JSONDecodeError, ValueError):
            if attempt < max_retries - 1:
                time.sleep(0.001)
                continue
            return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0)
        except Exception:
            return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0)

def write_action(action_name):
    if action_name:
        action_name = action_name.upper()
    try:
        with open(ACTION_FILE, "w") as f:
            json.dump({"action": action_name}, f)
            f.flush()
    except Exception as e:
        print(f"[ERROR] Failed to write action: {e}")

In [None]:
# ============================================================================
# CELL 2: Perceptron Classes
# ============================================================================
# CHANGES:
# 1. Added cluster_activations (deque maxlen=50) for clustering comparison
# 2. Entity perceptrons record into cluster_activations on every predict()
# ============================================================================

class Perceptron:
    def __init__(self, kind, action=None, group=None, entity_type=None):
        self.kind = kind
        self.action = action
        self.group = group
        self.entity_type = entity_type
        
        self.utility = 1.0
        self.weights = None
        
        self.eligibility_fast = 0.0
        self.eligibility_slow = 0.0
        
        self.familiarity = 0.0
        self.activation_history = deque(maxlen=10)
        self.cluster_activations = deque(maxlen=50)  # NEW: longer history for clustering
        
        self.learning_rate = 0.01
        self.prediction_errors = deque(maxlen=50)

    def ensure_weights(self, dim):
        if self.weights is None:
            self.weights = np.random.randn(dim) * 0.001

    def predict(self, state):
        self.ensure_weights(len(state))
        
        if len(self.weights) != len(state):
            min_dim = min(len(self.weights), len(state))
            raw_activation = np.dot(self.weights[:min_dim], state[:min_dim])
        else:
            raw_activation = np.dot(self.weights, state)
        
        if self.kind == "entity":
            novelty_factor = 1.0 / (1.0 + np.sqrt(self.familiarity * 0.5))
            decayed_activation = raw_activation * novelty_factor
            self.activation_history.append(abs(raw_activation))
            self.cluster_activations.append(abs(raw_activation))  # NEW
            return decayed_activation
        else:
            return raw_activation

    def adapt_learning_rate(self):
        if len(self.prediction_errors) >= 50:
            avg_error = np.mean(self.prediction_errors)
            
            if avg_error < 0.1:
                self.learning_rate = max(0.001, self.learning_rate * 0.99)
            elif avg_error > 0.5:
                self.learning_rate = min(0.05, self.learning_rate * 1.01)

    def update(self, state, error, gamma_fast=0.5, gamma_slow=0.95, stagnation=0.0):
        self.ensure_weights(len(state))
        
        if len(self.weights) != len(state):
            min_dim = min(len(self.weights), len(state))
            state = state[:min_dim]
            self.weights = self.weights[:min_dim]
        
        self.eligibility_fast = gamma_fast * self.eligibility_fast + 1.0
        self.eligibility_slow = gamma_slow * self.eligibility_slow + 1.0
        
        self.adapt_learning_rate()
        
        fast_update = 0.7 * self.learning_rate * error * state * self.eligibility_fast
        slow_update = 0.3 * self.learning_rate * error * state * self.eligibility_slow
        self.weights += fast_update + slow_update

        if self.kind == "action":
            if error > 0.01:
                if stagnation > 0.5:
                    self.utility *= 0.97
                elif error > 0.2:
                    self.utility = min(self.utility * 1.02, 2.0)
                else:
                    self.utility *= 0.995
            
            if self.group == "move":
                self.utility = np.clip(self.utility, 0.1, 2.0)
            else:
                self.utility = np.clip(self.utility, 0.01, 2.0)
        
        if self.kind == "entity" and len(self.activation_history) > 0:
            recent_avg = np.mean(self.activation_history)
            if recent_avg > 0.1:
                self.familiarity += 0.03
        
        if self.kind == "entity":
            prediction = self.predict(state)
            self.prediction_errors.append(abs(prediction - error))


class ControlSwapPerceptron(Perceptron):
    def __init__(self):
        super().__init__(kind="control_swap")
        self.swap_history = deque(maxlen=100)
        self.confidence = 0.0
        
    def should_swap(self, state, movement_stagnation):
        if self.weights is None:
            return False, 0.0
        
        self.ensure_weights(len(state))
        swap_score = np.dot(self.weights, state)
        stagnation_factor = np.tanh(movement_stagnation / 5.0)
        combined_score = swap_score * 0.7 + stagnation_factor * 0.3
        
        return combined_score > 0.5, abs(combined_score)
    
    def record_swap_outcome(self, state, swapped, novelty_gained):
        self.swap_history.append((swapped, novelty_gained))
        
        if len(self.swap_history) >= 20:
            recent = list(self.swap_history)[-20:]
            successful = sum(1 for swap, nov in recent if swap and nov > 0.2)
            self.confidence = successful / 20.0

In [None]:
# ============================================================================
# CELL 3 PART 1: Brain Class - Initialization (All State Variables)
# ============================================================================
# CHANGES:
# 1. Battle thread state variables
# 2. Cross-map navigation state variables
#    - map_graph, nav_map_chain, nav_chain_index
#    - nav_paused, nav_paused_reason, nav_cross_map_target
# ============================================================================

class Brain:
    def __init__(self):
        self.perceptrons = []
        
        self.prev_learning_states = deque(maxlen=50)
        self.prev_context_states = deque(maxlen=10)
        self.last_positions = deque(maxlen=30)
        self.action_history = deque(maxlen=100)
        
        self.control_mode = "move"
        self.timestep = 0
        self.last_action = None
        self.last_direction = 0
        
        self.MOVE_UTILITY_FLOOR = 0.05
        self.INTERACT_UTILITY_FLOOR = 0.15
        
        # === PERSISTENT EXPLORATION MEMORY ===
        self.EXPLORATION_MEMORY_FILE = BASE_PATH / "exploration_memory.json"
        self.exploration_memory = {}
        self.current_map_id = None
        self.SAVE_INTERVAL = 100
        
        self.DIRECTION_NAMES = {0: "DOWN", 1: "UP", 2: "LEFT", 3: "RIGHT"}
        self.DIRECTION_TO_INT = {"DOWN": 0, "UP": 1, "LEFT": 2, "RIGHT": 3}
        self.INT_TO_ACTION = {0: "DOWN", 1: "UP", 2: "LEFT", 3: "RIGHT"}
        
        self.DIRECTION_DELTAS_INT = {0: (0, 1), 1: (0, -1), 2: (-1, 0), 3: (1, 0)}
        self.ACTION_DELTAS = {"UP": (0, -1), "DOWN": (0, 1), "LEFT": (-1, 0), "RIGHT": (1, 0)}
        self.DELTA_TO_DIRECTION = {(0, 1): 0, (0, -1): 1, (-1, 0): 2, (1, 0): 3}
        
        self.load_exploration_memory()
        
        # === MARKOV TRANSITION SYSTEM (OVERWORLD) ===
        self.taught_transitions = []
        self.taught_batches = []
        self.taught_metadata = {}
        self.markov_enabled = True
        self.markov_action_count = 0
        self.curiosity_action_count = 0
        self.last_markov_score = 0.0
        self.last_markov_action = None
        
        # === BATTLE THREAD STATE ===
        self.battle_transitions = []
        self.battle_sequences = []
        self.battle_metadata = {}
        self.battle_loaded = False
        self.battle_action_count = 0
        self.battle_markov_action_count = 0
        self.current_battle_id = 0
        self.battle_frame_count = 0
        self.last_battle_markov_score = 0.0
        self.last_battle_markov_action = None
        self.in_battle_last_frame = False
        self.battle_action_history = deque(maxlen=100)  # separate history, battle actions only
        
        # === TAUGHT MODEL REFERENCE (read-only, for stagnation blending) ===
        self.taught_reference = {
            'utilities': {},
            'weights': {},
            'loaded': False
        }
        
        # === BLEND SYSTEM ===
        self.blend_tier = 0
        self.last_blend_timestep = 0
        self.BLEND_COOLDOWN = 50
        self.blend_count = 0
        
        self.BLEND_RATIOS = {
            1: (0.80, 0.20),
            2: (0.60, 0.40),
            3: (0.40, 0.60)
        }
        
        self.BLEND_TIER_TRIGGERS = {
            1: {'pattern_repeats': 3, 'pos_stagnation': 8, 'consecutive': 12},
            2: {'pattern_repeats': 6, 'pos_stagnation': 15, 'consecutive': 15},
            3: {'pattern_repeats': 10, 'state_stagnation_mult': 2.0}
        }
        
        # === ACTION EXECUTION CONFIRMATION ===
        self.pending_action = None
        self.pending_action_frames = 0
        self.ACTION_CONFIRM_FRAMES = 3
        self.last_confirmed_action = None
        
        # === TILE INTERACTION PROBING ===
        self.INTERACTION_VERIFY_FRAMES = 8
        self.MIN_SUCCESS_RATE_THRESHOLD = 0.1
        self.pending_interaction_verify = None
        self.interaction_verify_countdown = 0
        
        # === MENU ESCAPE B-BOOST ===
        self.menu_trap_frames = 0
        self.menu_trap_b_boost = 1.0
        self.menu_trap_position = None
        self.B_BOOST_INCREMENT = 0.15
        self.B_BOOST_MAX = 3.0
        self.MENU_TRAP_THRESHOLD = 5
        self.original_b_utility = None
        
        # === ADAPTIVE MODE SWAPPING ===
        self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD = 15
        self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD = 25
        self.move_to_interact_threshold = self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD
        self.interact_to_move_threshold = self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD
        self.THRESHOLD_INCREMENT = 15
        self.MAX_THRESHOLD = 150
        self.frames_in_current_mode = 0
        self.swap_chain_count = 0
        self.position_at_mode_swap = None
        self.last_map_id = None
        self.last_battle_state = None
        
        # === UNPRODUCTIVE MODE SWAP TRACKING ===
        self.UNPRODUCTIVE_SWAP_THRESHOLD = 3
        self.unproductive_swap_count = 0
        self.utilities_before_swapping = {}
        self.swap_chain_active = False
        
        # === STATE STAGNATION DETECTION ===
        self.STATE_STAGNATION_THRESHOLD = 20
        self.state_stagnation_count = 0
        self.last_context_state_hash = None
        self.stagnation_initiator_action = None
        self.STAGNATION_INITIATOR_PENALTY = 0.7
        
        # === "BOTH" MODE THRESHOLDS ===
        self.BOTH_MODE_STAGNATION_THRESHOLD = 35
        self.BOTH_MODE_SWAP_THRESHOLD = 5
        
        # === TURN AS PROGRESS TRACKING ===
        self.last_direction_for_progress = None
        self.direction_change_counts_as_progress = True
        
        # === NOVELTY WEIGHTS ===
        self.UNVISITED_TILE_BONUS = 1.5
        self.OBSTRUCTION_PENALTY = 0.25
        
        # === TRANSITION SYSTEM ===
        self.TRANSITION_ATTRACTION_WEIGHT = 0.6
        self.TEMP_DEBT_ACCUMULATION = 0.5
        self.TEMP_DEBT_DECAY = 0.02
        self.TEMP_DEBT_MAX = 15.0
        
        # === DEBT CAPS AND DECAY ===
        self.MAX_MAP_DEBT = 10.0
        self.MAX_LOCATION_DEBT = 5.0
        self.DEBT_DECAY_RATE = 0.005
        
        # === TRANSITION BAN SYSTEM ===
        self.transition_bans = {}
        self.BAN_VICINITY_RADIUS = 3
        self.BAN_COVERAGE_LIFT_THRESHOLD = 0.6
        self.BAN_TIMEOUT_STEPS = 300
        
        # Multi-scale memory
        self.visited_maps = {}
        self.map_novelty_debt = {}
        self.location_memory = {}
        self.location_novelty = {}
        self.action_execution_count = {}
        
        self.swap_perceptron = ControlSwapPerceptron()
        self.error_history = deque(maxlen=1000)
        self.numeric_error_history = deque(maxlen=1000)
        self.visual_error_history = deque(maxlen=1000)
        self._entity_norms_cache = {}
        self._cache_valid = False
        self.innate_entities_spawned = False
        
        # === REPETITION CORRECTION ===
        self.consecutive_action_count = 0
        self.current_repeated_action = None
        self.LEARNING_SLOWDOWN_START = 3
        self.LEARNING_SLOWDOWN_MAX = 10
        self.PENALTY_THRESHOLD = 12
        self.HARD_RESET_THRESHOLD = 18
        
        # === PATTERN DETECTION ===
        self.PATTERN_CHECK_WINDOW = 50
        self.PATTERN_MIN_REPEATS = 3
        self.PATTERN_MAX_LENGTH = 10
        self.detected_pattern = None
        self.pattern_repeat_count = 0

        # === PROBE ACTION CACHE ===
        self._cached_probe_action = None
        self._cached_probe_dir = None
        self._probe_cache_position = None

        # === NAVIGATION MODE ===
        self.nav_active = False
        self.nav_path = []
        self.nav_path_index = 0
        self.nav_target = None
        self.nav_target_list = []
        self.nav_target_index = 0
        self.nav_struck_targets = set()
        self.nav_steps_taken = 0
        self.nav_stagnation_count = 0
        self.nav_last_position = None
        
        self.KNOWN_AREA_TRIGGER = 20
        self.known_area_counter = 0
        self.NAV_STAGNATION_LIMIT = 8
        self.NAV_MAX_STEPS = 100
        self.NAV_CURIOSITY_WINDOW = 5
        self.nav_curiosity_countdown = 0
        self.NAV_LEARNING_DAMPENING = 0.3

        # === CROSS-MAP NAVIGATION ===
        self.nav_map_chain = []           # ordered list of map_ids to traverse [current, ..., target_map]
        self.nav_chain_index = 0          # which step in the chain we're on
        self.nav_cross_map_target = None  # the final target (pos, map_id) across maps
        self.nav_cross_map_target_data = None  # metadata about the final target (score, type, etc.)
        self.nav_paused = False           # True when waiting for transition discovery
        self.nav_paused_reason = ""       # why navigation is paused
        self.nav_paused_target_map = None # which map we need a transition to
        self.NAV_PAUSE_CHECK_INTERVAL = 50  # how often to recheck for new transitions while paused
        self.nav_pause_check_countdown = 0  # countdown to next recheck
        self.NAV_CROSS_MAP_REFRESH_INTERVAL = 40  # how often to recalculate cross-map path
        self.nav_cross_map_refresh_countdown = 0   # countdown to next refresh
        self._map_graph = {}              # map_id -> set of (dest_map_id, transition_dict) ‚Äî rebuilt on demand
        self._map_graph_dirty = True      # flag to rebuild graph when transitions change

        # === ENTITY SPAWNING & CLUSTERING ===
        self.ENTITY_INITIAL_CAPACITY = 20
        self.entity_capacity = self.ENTITY_INITIAL_CAPACITY
        self.ENTITY_CAPACITY_GROWTH = 1.5
        self.ENTITY_CLUSTER_SIMILARITY = 0.85
        self.ENTITY_MIN_ACTIVATIONS = 10
        self.entity_spawn_count = 0
        self.entity_merge_count = 0

        # === TAUGHT NAVIGATION TARGETS ===
        self.taught_nav_targets = {}
        self.taught_nav_global_order = []
        self.nav_visited_targets = set()
        self.taught_nav_loaded = False

# ============================================================================
# CELL 3 PART 2: Taught Reference, Blend System, Markov (Overworld + Battle)
# ============================================================================
# CHANGES:
# 1. Added load_taught_battle_transitions() method
# 2. Added compute_battle_markov_similarity() method
# 3. Added get_battle_markov_action() method
# ============================================================================

    # =========================================================================
    # TAUGHT MODEL REFERENCE
    # =========================================================================
    
    def load_taught_reference(self, filepath):
        """
        Load taught model as a READ-ONLY reference for stagnation blending.
        Does NOT overwrite AI's own utilities or weights.
        """
        try:
            if not Path(filepath).exists():
                print(f"  No taught reference model found at {filepath}")
                return
            
            with open(filepath, 'r') as f:
                model = json.load(f)
            
            if "perceptrons" not in model:
                print(f"  ‚ö†Ô∏è Taught reference model empty or invalid")
                return
            
            for saved_action in model["perceptrons"].get("actions", []):
                action_name = saved_action.get("action")
                if action_name:
                    self.taught_reference['utilities'][action_name] = saved_action.get("utility", 1.0)
                    
                    if saved_action.get("weights_nonzero"):
                        dim = saved_action.get("weights_shape", 1376)
                        w = np.zeros(dim)
                        for idx, val in saved_action["weights_nonzero"]:
                            if idx < dim:
                                w[idx] = val
                        self.taught_reference['weights'][action_name] = w
            
            self.taught_reference['loaded'] = True
            
            print(f"  üìñ Taught reference loaded:")
            print(f"     Actions: {list(self.taught_reference['utilities'].keys())}")
            print(f"     Utilities: {', '.join(f'{k}:{v:.3f}' for k, v in self.taught_reference['utilities'].items())}")
            print(f"     Weights available: {list(self.taught_reference['weights'].keys())}")
            
        except Exception as e:
            print(f"  ‚ö†Ô∏è Error loading taught reference: {e}")
    
    def blend_from_taught(self, tier):
        """
        Blend AI's current utilities (and optionally weights) toward taught values.
        
        tier 1 (light):  80% AI / 20% taught ‚Äî utilities only
        tier 2 (medium): 60% AI / 40% taught ‚Äî utilities only
        tier 3 (hard):   40% AI / 60% taught ‚Äî utilities + weights
        """
        if not self.taught_reference['loaded']:
            return
        
        if tier not in self.BLEND_RATIOS:
            return
        
        if self.timestep - self.last_blend_timestep < self.BLEND_COOLDOWN:
            return
        
        ai_weight, taught_weight = self.BLEND_RATIOS[tier]
        blend_weights = (tier == 3)
        
        blended_actions = []
        
        for a in self.actions():
            if a.action not in self.taught_reference['utilities']:
                continue
            
            taught_util = self.taught_reference['utilities'][a.action]
            old_util = a.utility
            
            a.utility = ai_weight * a.utility + taught_weight * taught_util
            
            if taught_util > 1.0:
                a.utility = max(a.utility, taught_util * 0.5)
            
            floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
            a.utility = max(a.utility, floor)
            a.utility = min(a.utility, 2.0)
            
            blended_actions.append(f"{a.action}:{old_util:.3f}‚Üí{a.utility:.3f}")
            
            if blend_weights and a.action in self.taught_reference['weights']:
                taught_w = self.taught_reference['weights'][a.action]
                if a.weights is not None:
                    min_dim = min(len(a.weights), len(taught_w))
                    a.weights[:min_dim] = (
                        ai_weight * a.weights[:min_dim] + 
                        taught_weight * taught_w[:min_dim]
                    )
        
        self.last_blend_timestep = self.timestep
        self.blend_tier = tier
        self.blend_count += 1
        
        tier_names = {1: "LIGHT", 2: "MEDIUM", 3: "HARD"}
        print(f"  üîÄ BLEND [{tier_names.get(tier, '?')}] ({ai_weight:.0%} AI / {taught_weight:.0%} taught)"
              f" | Blend #{self.blend_count}")
        for ba in blended_actions:
            print(f"     {ba}")
        if blend_weights:
            print(f"     + Weights blended for: {list(self.taught_reference['weights'].keys())}")

    # =========================================================================
    # OVERWORLD MARKOV TRANSITION SYSTEM
    # =========================================================================
    
    def load_taught_transitions(self, filepath=None):
        filepath = filepath or TAUGHT_TRANSITIONS_FILE
        try:
            if Path(filepath).exists():
                with open(filepath, 'r') as f:
                    data = json.load(f)
                
                self.taught_transitions = []
                self.taught_batches = data.get('batches', [])
                
                for batch in self.taught_batches:
                    batch_type = batch.get('batch_type', 'steady')
                    trigger_action = batch.get('trigger_action')
                    
                    for frame in batch.get('frames', []):
                        transition = {
                            'state': frame.get('state', {}),
                            'action': frame.get('action'),
                            'recent_actions': frame.get('recent_actions', []),
                            'frame_offset': frame.get('frame_offset', 0),
                            'batch_type': batch_type,
                            'trigger_action': trigger_action
                        }
                        self.taught_transitions.append(transition)
                
                self.taught_metadata = data.get('metadata', {})
                
                print(f"  üìö Loaded taught transitions:")
                print(f"     Batches: {len(self.taught_batches)}")
                print(f"     Frames: {len(self.taught_transitions)}")
                print(f"     Action changes: {self.taught_metadata.get('action_changes', 0)}")
                print(f"     Maps visited: {self.taught_metadata.get('maps_visited', [])}")
            else:
                self.taught_transitions = []
                self.taught_batches = []
                self.taught_metadata = {}
                print(f"  No taught transitions file found at {filepath}")
        except Exception as e:
            print(f"  Error loading taught transitions: {e}")
            self.taught_transitions = []
            self.taught_batches = []
            self.taught_metadata = {}
    
    def extract_partial_context(self, context_state, raw_position=None):
        raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
        raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
        current_map = int(context_state[2])
        
        movement_blocked = self.get_position_stagnation() > 3
        
        near_transition = False
        memory = self.get_current_map_memory(current_map)
        for t in memory.get('transitions', []):
            t_pos = tuple(t['position']) if isinstance(t['position'], list) else t['position']
            if abs(raw_x - t_pos[0]) + abs(raw_y - t_pos[1]) <= 2:
                near_transition = True
                break
        
        tile_probed = not self.should_interact_at_tile(raw_x, raw_y, current_map)
        
        return {
            'in_battle': context_state[3] > 0.5,
            'in_menu': context_state[4] > 0.5,
            'movement_blocked': movement_blocked,
            'near_transition': near_transition,
            'tile_probed': tile_probed
        }
    
    def compute_markov_similarity(self, context_state, raw_position=None, taught_frames=None):
        frames = taught_frames if taught_frames is not None else self.taught_transitions
        skip_map_check = taught_frames is not None
        
        if not frames:
            return 0.0, None, -1
        
        raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
        raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
        current_map = int(context_state[2])
        current_dir = int(context_state[5])
        in_battle = context_state[3] > 0.5
        in_menu = context_state[4] > 0.5
        
        current_actions = list(self.action_history)
        current_partial = self.extract_partial_context(context_state, raw_position)
        
        best_score = 0.0
        best_action = None
        best_idx = -1
        
        for idx, transition in enumerate(frames):
            t_state = transition.get('state', {})
            t_action = transition.get('action')
            t_recent = transition.get('recent_actions', [])
            batch_type = transition.get('batch_type', 'steady')
            
            if not t_action or t_action == "NONE":
                continue
            
            immediate_score = 0.0
            
            if not skip_map_check:
                if t_state.get('map_id') != current_map:
                    continue
            immediate_score += 0.25
            
            t_x = t_state.get('x', 0)
            t_y = t_state.get('y', 0)
            pos_dist = abs(raw_x - t_x) + abs(raw_y - t_y)
            
            if pos_dist == 0:
                immediate_score += MARKOV_POS_EXACT_BONUS
            elif pos_dist <= 2:
                immediate_score += MARKOV_POS_NEAR_BONUS
            elif pos_dist <= MARKOV_POS_MAX_DIST:
                immediate_score += MARKOV_POS_FAR_BONUS
            else:
                continue
            
            if t_state.get('direction') == current_dir:
                immediate_score += 0.2
            
            t_in_battle = t_state.get('in_battle', 0) == 1
            t_in_menu = t_state.get('in_menu', 0) == 1
            
            if t_in_battle == in_battle:
                immediate_score += 0.1
            if t_in_menu == in_menu:
                immediate_score += 0.1
            
            sequential_score = 0.0
            
            if t_recent and current_actions:
                if len(current_actions) >= 8 and len(t_recent) >= 8:
                    if list(current_actions)[-8:] == t_recent[-8:]:
                        sequential_score = MARKOV_SEQ_FULL_WEIGHT
                
                if sequential_score < MARKOV_SEQ_MEDIUM_WEIGHT:
                    if len(current_actions) >= 5 and len(t_recent) >= 5:
                        if list(current_actions)[-5:] == t_recent[-5:]:
                            sequential_score = MARKOV_SEQ_MEDIUM_WEIGHT
                
                if sequential_score < MARKOV_SEQ_SHORT_WEIGHT:
                    if len(current_actions) >= 3 and len(t_recent) >= 3:
                        if list(current_actions)[-3:] == t_recent[-3:]:
                            sequential_score = MARKOV_SEQ_SHORT_WEIGHT
            
            partial_score = 0.0
            partial_matches = 0
            partial_total = 2
            
            if t_in_battle == current_partial['in_battle']:
                partial_matches += 1
            if t_in_menu == current_partial['in_menu']:
                partial_matches += 1
            
            partial_score = partial_matches / partial_total
            
            total_score = (
                MARKOV_IMMEDIATE_WEIGHT * immediate_score +
                MARKOV_SEQUENTIAL_WEIGHT * sequential_score +
                MARKOV_PARTIAL_WEIGHT * partial_score
            )
            
            if batch_type == "action_change":
                total_score *= 1.2
            
            if transition.get('frame_offset', 0) == 0:
                total_score *= 1.1
            
            if total_score > best_score:
                best_score = total_score
                best_action = t_action
                best_idx = idx
        
        return best_score, best_action, best_idx
    
    def get_markov_action(self, context_state, raw_position=None, taught_frames=None):
        if not self.markov_enabled:
            return False, None, 0.0
        
        frames = taught_frames if taught_frames is not None else self.taught_transitions
        if not frames:
            return False, None, 0.0
        
        score, action, idx = self.compute_markov_similarity(
            context_state, raw_position, taught_frames=frames
        )
        
        self.last_markov_score = score
        
        if score >= MARKOV_FAMILIARITY_THRESHOLD:
            self.last_markov_action = action
            return True, action, score
        
        return False, None, score

    # =========================================================================
    # BATTLE MARKOV SYSTEM
    # =========================================================================
    
    def load_taught_battle_transitions(self, filepath=None):
        """
        Load battle demonstration frames from taught_battle_transitions.json.
        This is read-only ‚Äî the AI never writes to this file.
        Falls back gracefully if file is missing.
        """
        filepath = filepath or TAUGHT_BATTLE_TRANSITIONS_FILE
        try:
            if not Path(filepath).exists():
                self.battle_transitions = []
                self.battle_sequences = []
                self.battle_metadata = {}
                self.battle_loaded = False
                print(f"  ‚öîÔ∏è No battle transitions file found at {filepath}")
                print(f"     Battle fallback: A-button only")
                return
            
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            self.battle_transitions = data.get('flat_frames', [])
            self.battle_sequences = data.get('battle_sequences', [])
            self.battle_metadata = data.get('metadata', {})
            self.battle_loaded = True
            
            print(f"  ‚öîÔ∏è Loaded battle transitions:")
            print(f"     Flat frames: {len(self.battle_transitions)}")
            print(f"     Battle sequences: {len(self.battle_sequences)}")
            print(f"     Total battle frames: {self.battle_metadata.get('total_battle_frames', 0)}")
            print(f"     Battles recorded: {self.battle_metadata.get('battles_recorded', 0)}")
            print(f"     Avg battle length: {self.battle_metadata.get('avg_battle_length', 0)}")
            outcomes = self.battle_metadata.get('outcomes', {})
            if outcomes:
                print(f"     Outcomes: {outcomes}")
            common_seqs = self.battle_metadata.get('most_common_sequences', [])
            if common_seqs:
                for seq in common_seqs[:3]:
                    print(f"     Common seq: {seq.get('sequence', [])} x{seq.get('count', 0)} ({seq.get('context', '')})")
        
        except Exception as e:
            print(f"  ‚ö†Ô∏è Error loading battle transitions: {e}")
            self.battle_transitions = []
            self.battle_sequences = []
            self.battle_metadata = {}
            self.battle_loaded = False
    
    def compute_battle_markov_similarity(self, context_state, palette_state=None):
        """
        Battle-specific Markov matching against taught battle frames.
        
        Simpler than overworld Markov:
        - No map filtering (battle behavior is map-independent)
        - No position matching (position is irrelevant in battle)
        - Primary signal: recent_actions sequential matching (70%)
        - Secondary signal: palette similarity (20%)
        - Tertiary signal: menu state (10%)
        
        Returns: (score, action, index)
        """
        if not self.battle_transitions:
            return 0.0, None, -1
        
        current_actions = list(self.battle_action_history)
        in_menu = context_state[4] > 0.5
        
        best_score = 0.0
        best_action = None
        best_idx = -1
        
        for idx, frame in enumerate(self.battle_transitions):
            t_action = frame.get('action')
            t_recent = frame.get('recent_actions', [])
            t_state = frame.get('state', {})
            batch_type = frame.get('batch_type', 'steady')
            
            if not t_action or t_action == "NONE":
                continue
            
            # === PRIMARY: Sequential action matching (70% weight) ===
            seq_score = 0.0
            
            if t_recent and current_actions:
                # 8-action match = very high confidence
                if len(current_actions) >= 8 and len(t_recent) >= 8:
                    if list(current_actions)[-8:] == t_recent[-8:]:
                        seq_score = 1.0
                
                # 5-action match = high confidence
                if seq_score < 0.6:
                    if len(current_actions) >= 5 and len(t_recent) >= 5:
                        if list(current_actions)[-5:] == t_recent[-5:]:
                            seq_score = 0.6
                
                # 3-action match = moderate confidence
                if seq_score < 0.3:
                    if len(current_actions) >= 3 and len(t_recent) >= 3:
                        if list(current_actions)[-3:] == t_recent[-3:]:
                            seq_score = 0.3
            
            # === SECONDARY: Palette similarity (20% weight) ===
            palette_score = 0.0
            
            if palette_state is not None and 'palette_snapshot' in frame:
                # If the frame has a palette snapshot, compare
                t_palette = np.array(frame['palette_snapshot'], dtype=float)
                if len(t_palette) > 0 and len(palette_state) > 0:
                    min_dim = min(len(t_palette), len(palette_state))
                    diff = np.linalg.norm(t_palette[:min_dim] - palette_state[:min_dim])
                    # Normalize: lower diff = higher similarity
                    palette_score = 1.0 / (1.0 + diff * 0.01)
            else:
                # No palette data available ‚Äî neutral score
                palette_score = 0.5
            
            # === TERTIARY: Menu state match (10% weight) ===
            menu_score = 0.0
            t_in_menu = t_state.get('in_menu', 0) == 1
            if t_in_menu == in_menu:
                menu_score = 1.0
            
            # === COMBINE ===
            total_score = (
                BATTLE_MARKOV_ACTION_SEQ_WEIGHT * seq_score +
                BATTLE_MARKOV_PALETTE_WEIGHT * palette_score +
                BATTLE_MARKOV_MENU_STATE_WEIGHT * menu_score
            )
            
            # Boost action_change frames (human actively chose this)
            if batch_type == "action_change":
                total_score *= 1.15
            
            # Boost frame_offset 0 (start of a new action)
            if frame.get('frame_offset', 0) == 0:
                total_score *= 1.1
            
            if total_score > best_score:
                best_score = total_score
                best_action = t_action
                best_idx = idx
        
        return best_score, best_action, best_idx
    
    def get_battle_markov_action(self, context_state, palette_state=None):
        """
        Get battle action from Markov matching.
        
        Returns: (matched, action, score)
          matched: True if score >= threshold
          action: the suggested action string (or None)
          score: the similarity score
        """
        if not self.battle_loaded or not self.battle_transitions:
            return False, None, 0.0
        
        score, action, idx = self.compute_battle_markov_similarity(
            context_state, palette_state
        )
        
        self.last_battle_markov_score = score
        
        # Use lower threshold than overworld ‚Äî follow human closely
        threshold = BATTLE_MARKOV_THRESHOLD_LOW
        if len(self.battle_transitions) > 200:
            # More data = can be slightly pickier
            threshold = BATTLE_MARKOV_THRESHOLD_HIGH
        
        if score >= threshold and action:
            self.last_battle_markov_action = action
            return True, action, score
        
        return False, None, score


# ============================================================================
# CELL 3 PART 3: Action Confirmation, Exploration Memory, Tile Probing
# ============================================================================
# NO CHANGES from original Cell 3 Part 1/2 ‚Äî just reorganized boundary
# ============================================================================

    # =========================================================================
    # ACTION EXECUTION CONFIRMATION
    # =========================================================================
    
    def set_pending_action(self, action_name):
        self.pending_action = action_name
        self.pending_action_frames = 0
    
    def confirm_action_executed(self, context_state, prev_context_state):
        if self.pending_action is None:
            return True
        self.pending_action_frames += 1
        action_executed = False
        if prev_context_state is not None:
            if self.pending_action in ["UP", "DOWN", "LEFT", "RIGHT"]:
                pos_changed = (context_state[0] != prev_context_state[0] or 
                              context_state[1] != prev_context_state[1])
                dir_changed = context_state[5] != prev_context_state[5]
                action_executed = pos_changed or dir_changed
            elif self.pending_action in ["A", "B", "Start", "Select"]:
                menu_changed = abs(context_state[4] - prev_context_state[4]) > 0.1
                battle_changed = context_state[3] != prev_context_state[3]
                map_changed = context_state[2] != prev_context_state[2]
                action_executed = menu_changed or battle_changed or map_changed
        if action_executed or self.pending_action_frames >= self.ACTION_CONFIRM_FRAMES:
            self.last_confirmed_action = self.pending_action
            self.pending_action = None
            self.pending_action_frames = 0
            return True
        return False
    
    def should_send_new_action(self):
        return self.pending_action is None or self.pending_action_frames >= self.ACTION_CONFIRM_FRAMES

    # =========================================================================
    # EXPLORATION MEMORY PERSISTENCE
    # =========================================================================
    
    def load_exploration_memory(self):
        try:
            if self.EXPLORATION_MEMORY_FILE.exists():
                with open(self.EXPLORATION_MEMORY_FILE, 'r') as f:
                    data = json.load(f)
                    self.exploration_memory = {}
                    for map_key, map_data in data.items():
                        map_id = int(map_key.replace('map_', ''))
                        self.exploration_memory[map_id] = self._deserialize_map_memory(map_data)
                print(f"  Loaded exploration memory: {len(self.exploration_memory)} maps")
            else:
                self.exploration_memory = {}
        except Exception as e:
            print(f"  Error loading exploration memory: {e}")
            self.exploration_memory = {}

    def _deserialize_map_memory(self, map_data):
        memory = {
            'visited_tiles': set(tuple(t) for t in map_data.get('visited_tiles', [])),
            'obstructions': set(tuple(t) for t in map_data.get('obstructions', [])),
            'interactable_objects': map_data.get('interactable_objects', []),
            'last_visited_timestep': map_data.get('last_visited_timestep', 0),
            'transitions': map_data.get('transitions', []),
            'temp_debt': map_data.get('temp_debt', 0.0),
            'tile_interactions': {}
        }
        for tile_key, tile_data in map_data.get('tile_interactions', {}).items():
            memory['tile_interactions'][tile_key] = {
                'directions_tried': set(tile_data.get('directions_tried', [])),
                'direction_attempts': {int(k): v for k, v in tile_data.get('direction_attempts', {}).items()},
                'direction_successes': {int(k): v for k, v in tile_data.get('direction_successes', {}).items()},
                'exhausted': tile_data.get('exhausted', False)
            }
        return memory

    def save_exploration_memory(self):
        try:
            data = {f'map_{mid}': self._serialize_map_memory(md) for mid, md in self.exploration_memory.items()}
            with open(self.EXPLORATION_MEMORY_FILE, 'w') as f:
                json.dump(data, f, indent=2)
        except Exception as e:
            print(f"  Error saving exploration memory: {e}")

    def _serialize_map_memory(self, map_data):
        serialized_ti = {}
        for tile_key, td in map_data.get('tile_interactions', {}).items():
            serialized_ti[tile_key] = {
                'directions_tried': list(td.get('directions_tried', set())),
                'direction_attempts': {str(k): v for k, v in td.get('direction_attempts', {}).items()},
                'direction_successes': {str(k): v for k, v in td.get('direction_successes', {}).items()},
                'exhausted': td.get('exhausted', False)
            }
        return {
            'visited_tiles': list(map_data['visited_tiles']),
            'obstructions': list(map_data['obstructions']),
            'interactable_objects': map_data['interactable_objects'],
            'last_visited_timestep': map_data['last_visited_timestep'],
            'transitions': map_data.get('transitions', []),
            'temp_debt': map_data.get('temp_debt', 0.0),
            'tile_interactions': serialized_ti
        }

    def get_current_map_memory(self, map_id):
        if map_id not in self.exploration_memory:
            self.exploration_memory[map_id] = {
                'visited_tiles': set(), 'obstructions': set(), 'interactable_objects': [],
                'last_visited_timestep': self.timestep, 'transitions': [], 'temp_debt': 0.0,
                'tile_interactions': {}
            }
        return self.exploration_memory[map_id]

    def record_visited_tile(self, x, y, map_id):
        memory = self.get_current_map_memory(map_id)
        memory['visited_tiles'].add((int(x), int(y)))
        memory['last_visited_timestep'] = self.timestep

    def record_obstruction(self, x, y, map_id, direction):
        dx, dy = self.DIRECTION_DELTAS_INT.get(direction, (0, 0))
        memory = self.get_current_map_memory(map_id)
        memory['obstructions'].add((int(x + dx), int(y + dy)))

    # =========================================================================
    # TILE-BASED INTERACTION PROBING
    # =========================================================================
    
    def get_tile_interaction_key(self, x, y):
        return f"{int(x)}_{int(y)}"
    
    def get_tile_interaction_state(self, x, y, map_id):
        memory = self.get_current_map_memory(map_id)
        tile_key = self.get_tile_interaction_key(x, y)
        if tile_key not in memory['tile_interactions']:
            memory['tile_interactions'][tile_key] = {
                'directions_tried': set(),
                'direction_attempts': {0: 0, 1: 0, 2: 0, 3: 0},
                'direction_successes': {0: 0, 1: 0, 2: 0, 3: 0},
                'exhausted': False
            }
        return memory['tile_interactions'][tile_key]
    
    def should_interact_at_tile(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        if tile_state['exhausted']:
            return False
        if len(tile_state['directions_tried']) < 4:
            return True
        for d in range(4):
            attempts = tile_state['direction_attempts'].get(d, 0)
            successes = tile_state['direction_successes'].get(d, 0)
            if attempts > 0 and successes / attempts >= self.MIN_SUCCESS_RATE_THRESHOLD:
                return True
        return False
    
    def get_untried_directions(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        return [d for d in range(4) if d not in tile_state['directions_tried']]
    
    def get_best_interaction_direction(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        untried = self.get_untried_directions(x, y, map_id)
        if untried:
            return untried[0]
        best_dir, best_rate = None, 0.0
        for d in range(4):
            attempts = tile_state['direction_attempts'].get(d, 0)
            if attempts > 0:
                rate = tile_state['direction_successes'].get(d, 0) / attempts
                if rate > best_rate:
                    best_rate, best_dir = rate, d
        return best_dir
    
    def get_best_probe_action(self, raw_x, raw_y, current_map, current_dir):
        cache_key = (raw_x, raw_y, current_map, current_dir)
        
        if self._probe_cache_position == cache_key:
            return self._cached_probe_action, self._cached_probe_dir
        
        if not self.should_interact_at_tile(raw_x, raw_y, current_map):
            result = (None, None)
        else:
            untried = self.get_untried_directions(raw_x, raw_y, current_map)
            if not untried:
                best_dir = self.get_best_interaction_direction(raw_x, raw_y, current_map)
                if best_dir is not None:
                    result = ('A', current_dir) if current_dir == best_dir else (self.INT_TO_ACTION[best_dir], best_dir)
                else:
                    result = (None, None)
            elif current_dir in untried:
                result = ('A', current_dir)
            else:
                target_dir = untried[0]
                result = (self.INT_TO_ACTION[target_dir], target_dir)
        
        self._probe_cache_position = cache_key
        self._cached_probe_action, self._cached_probe_dir = result
        return result
    
    def record_tile_interaction_attempt(self, x, y, map_id, direction, success):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        tile_state['directions_tried'].add(direction)
        tile_state['direction_attempts'][direction] = tile_state['direction_attempts'].get(direction, 0) + 1
        if success:
            tile_state['direction_successes'][direction] = tile_state['direction_successes'].get(direction, 0) + 1
            memory = self.get_current_map_memory(map_id)
            dir_name = self.DIRECTION_NAMES.get(direction, str(direction))
            interactable = [int(x), int(y), dir_name]
            if interactable not in memory['interactable_objects']:
                memory['interactable_objects'].append(interactable)
                print(f"  üéØ INTERACTABLE FOUND: ({x}, {y}) facing {dir_name}")
        self._check_tile_exhaustion(x, y, map_id)
    
    def _check_tile_exhaustion(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        if len(tile_state['directions_tried']) < 4:
            return
        if not any(tile_state['direction_successes'].get(d, 0) > 0 for d in range(4)):
            tile_state['exhausted'] = True
            print(f"  ‚úì Tile ({x}, {y}) exhausted - no interactions found")
    
    def get_direction_success_rate(self, x, y, map_id, direction):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        attempts = tile_state['direction_attempts'].get(direction, 0)
        if attempts == 0:
            return None
        return tile_state['direction_successes'].get(direction, 0) / attempts
    
    def start_interaction_verification(self, x, y, map_id, direction):
        self.pending_interaction_verify = {'x': x, 'y': y, 'map_id': map_id, 'direction': direction}
        self.interaction_verify_countdown = self.INTERACTION_VERIFY_FRAMES
    
    def check_interaction_verification(self, context_state, prev_context_state):
        if self.pending_interaction_verify is None:
            return
        self.interaction_verify_countdown -= 1
        success = False
        if prev_context_state is not None:
            in_overworld = prev_context_state[3] <= 0.5 and prev_context_state[4] <= 0.5
            if in_overworld:
                menu_changed = abs(context_state[4] - prev_context_state[4]) > 0.1
                battle_started = context_state[3] > 0.5 and prev_context_state[3] <= 0.5
                map_changed = int(context_state[2]) != int(prev_context_state[2])
                success = menu_changed or battle_started or map_changed
        if success or self.interaction_verify_countdown <= 0:
            info = self.pending_interaction_verify
            self.record_tile_interaction_attempt(info['x'], info['y'], info['map_id'], info['direction'], success)
            self.pending_interaction_verify = None


# ============================================================================
# CELL 3 PART 4: Transitions, Debt, Menu Trap, Navigation (with Cross-Map)
# ============================================================================
# CHANGES:
# 1. Added build_map_graph() ‚Äî builds connectivity from transition data
# 2. Added find_map_path() ‚Äî BFS across map graph
# 3. Added get_transition_to_map() ‚Äî find the tile to reach a neighbor map
# 4. Modified start_navigation() ‚Äî detects cross-map targets, builds chain
# 5. Added advance_map_chain() ‚Äî called on map change during cross-map nav
# 6. Added pause/resume logic for missing transitions
# 7. Modified abort_navigation() ‚Äî clears cross-map state
# 8. Modified on_map_change() ‚Äî triggers chain advancement
# 9. record_transition() marks graph dirty
# ============================================================================

    # =========================================================================
    # TRANSITION SYSTEM
    # =========================================================================
    
    def record_transition(self, from_pos, from_map, to_map, direction, action_type):
        memory = self.get_current_map_memory(from_map)
        for t in memory['transitions']:
            if t['position'] == from_pos and t['direction'] == direction:
                t['use_count'] += 1
                t['last_used'] = self.timestep
                return
        memory['transitions'].append({
            'position': from_pos, 'direction': direction, 'action': action_type,
            'destination_map': to_map, 'use_count': 1, 'last_used': self.timestep
        })
        # Mark graph dirty so it rebuilds with the new transition
        self._map_graph_dirty = True
        print(f"  üö™ TRANSITION FOUND: Map {from_map} ({from_pos}) ‚Üí Map {to_map}")

    def get_transition_attraction(self, current_map):
        memory = self.get_current_map_memory(current_map)
        transitions = memory.get('transitions', [])
        if not transitions:
            return 0.0, None
        current_debt = self.map_novelty_debt.get(current_map, 0.0)
        current_temp_debt = self.get_temp_debt(current_map)
        current_coverage = self.get_exploration_coverage(current_map)
        best_attraction, best_transition = 0.0, None
        for t in transitions:
            if self.is_transition_banned(current_map, t['position'], t['direction']):
                continue
            dest_map = t['destination_map']
            dest_debt = self.map_novelty_debt.get(dest_map, 0.0)
            dest_temp_debt = self.get_temp_debt(dest_map)
            dest_coverage = self.get_exploration_coverage(dest_map)
            debt_diff = (current_debt + current_temp_debt * 2.0) - (dest_debt + dest_temp_debt * 2.0)
            coverage_diff = current_coverage - dest_coverage
            attraction = debt_diff * 0.5 + coverage_diff * 0.5
            if t['use_count'] < 3:
                attraction *= 1.5
            if attraction > best_attraction:
                best_attraction, best_transition = attraction, t
        return best_attraction * self.TRANSITION_ATTRACTION_WEIGHT, best_transition

    # =========================================================================
    # TRANSITION BAN SYSTEM
    # =========================================================================
    
    def create_transition_ban(self, map_id, tile_pos, direction_back):
        self.transition_bans[map_id] = {
            'banned_tile': tile_pos, 'banned_direction': direction_back,
            'vicinity_radius': self.BAN_VICINITY_RADIUS, 'vicinity_active': False,
            'created_at': self.timestep
        }
        print(f"  üö´ TRANSITION BAN: Map {map_id} at {tile_pos} facing {self.DIRECTION_NAMES.get(direction_back, '?')}")
    
    def is_transition_banned(self, map_id, position, direction):
        if map_id not in self.transition_bans:
            return False
        ban = self.transition_bans[map_id]
        banned_tile = tuple(ban['banned_tile']) if isinstance(ban['banned_tile'], list) else ban['banned_tile']
        position = tuple(position) if isinstance(position, list) else position
        if position == banned_tile and direction == ban['banned_direction']:
            return True
        if ban['vicinity_active']:
            dist = abs(position[0] - banned_tile[0]) + abs(position[1] - banned_tile[1])
            if dist <= ban['vicinity_radius'] and direction == ban['banned_direction']:
                return True
        return False
    
    def is_position_banned(self, map_id, x, y, direction):
        return self.is_transition_banned(map_id, (x, y), direction)
    
    def update_transition_ban(self, map_id, current_pos):
        if map_id not in self.transition_bans:
            return
        ban = self.transition_bans[map_id]
        banned_tile = tuple(ban['banned_tile']) if isinstance(ban['banned_tile'], list) else ban['banned_tile']
        if not ban['vicinity_active'] and abs(current_pos[0] - banned_tile[0]) + abs(current_pos[1] - banned_tile[1]) >= 3:
            ban['vicinity_active'] = True
            print(f"  üö´ VICINITY BAN ACTIVE: Map {map_id}")
    
    def check_ban_lift_conditions(self, map_id):
        if map_id not in self.transition_bans:
            return
        ban = self.transition_bans[map_id]
        should_lift, reason = False, ""
        memory = self.get_current_map_memory(map_id)
        non_banned = [t for t in memory.get('transitions', []) if not self.is_transition_banned(map_id, t['position'], t['direction'])]
        if non_banned:
            should_lift, reason = True, "alternative transition found"
        elif self.get_exploration_coverage(map_id) >= self.BAN_COVERAGE_LIFT_THRESHOLD:
            should_lift, reason = True, f"coverage reached"
        elif self.timestep - ban['created_at'] >= self.BAN_TIMEOUT_STEPS:
            should_lift, reason = True, "timeout"
        if should_lift:
            del self.transition_bans[map_id]
            print(f"  ‚úÖ BAN LIFTED: Map {map_id} - {reason}")

    # =========================================================================
    # DEBT SYSTEMS
    # =========================================================================
    
    def get_temp_debt(self, map_id):
        memory = self.get_current_map_memory(map_id)
        raw_debt = memory.get('temp_debt', 0.0)
        if map_id != self.current_map_id:
            steps_away = self.timestep - memory.get('last_visited_timestep', 0)
            return max(0.0, raw_debt - steps_away * self.TEMP_DEBT_DECAY)
        return raw_debt

    def accumulate_temp_debt(self, map_id):
        memory = self.get_current_map_memory(map_id)
        memory['temp_debt'] = min(self.TEMP_DEBT_MAX, memory.get('temp_debt', 0.0) + self.TEMP_DEBT_ACCUMULATION)

    def decay_all_debts(self):
        for map_id in list(self.map_novelty_debt.keys()):
            if map_id != self.current_map_id:
                self.map_novelty_debt[map_id] *= (1.0 - self.DEBT_DECAY_RATE)
                if self.map_novelty_debt[map_id] < 0.1:
                    del self.map_novelty_debt[map_id]
        
        current_loc = None
        if self.current_map_id is not None and len(self.last_positions) > 0:
            pos = self.last_positions[-1]
            current_loc = self.get_location_key(pos[0], pos[1], self.current_map_id)
        
        for loc in list(self.location_novelty.keys()):
            if loc != current_loc:
                self.location_novelty[loc] *= (1.0 - self.DEBT_DECAY_RATE)
                if self.location_novelty[loc] < 0.1:
                    del self.location_novelty[loc]

    def get_exploration_coverage(self, map_id):
        memory = self.get_current_map_memory(map_id)
        visited = len(memory['visited_tiles'])
        obstructions = len(memory['obstructions'])
        if visited == 0 or visited + obstructions < 10:
            return 0.0
        return visited / (visited + obstructions)

    def detect_obstruction(self, prev_context, context_state, raw_position, prev_raw_position):
        if prev_context is None or prev_raw_position is None:
            return False
        if self.last_action not in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
            return False
        if raw_position == prev_raw_position:
            self.record_obstruction(raw_position[0], raw_position[1], int(context_state[2]), int(context_state[5]))
            return True
        return False

    # =========================================================================
    # MENU TRAP B-BOOST
    # =========================================================================
    
    def update_menu_trap_tracking(self, context_state, action_taken, raw_position=None):
        current_pos = raw_position if raw_position else (round(context_state[0] * 255), round(context_state[1] * 255))
        if self.menu_trap_position is not None and current_pos != self.menu_trap_position:
            self.reset_menu_trap_boost()
            return
        if self.get_context_state_hash(context_state) == self.last_context_state_hash:
            if action_taken in ["A", "B", "Start", "Select"]:
                self.menu_trap_frames += 1
                self.menu_trap_position = current_pos
                if self.menu_trap_frames > self.MENU_TRAP_THRESHOLD:
                    if self.original_b_utility is None:
                        for a in self.actions():
                            if a.action == 'B':
                                self.original_b_utility = a.utility
                                break
                    self.menu_trap_b_boost = min(self.B_BOOST_MAX, self.menu_trap_b_boost + self.B_BOOST_INCREMENT)
        elif current_pos != self.menu_trap_position:
            self.reset_menu_trap_boost()

    def reset_menu_trap_boost(self):
        if self.menu_trap_b_boost > 1.0 and self.original_b_utility is not None:
            for a in self.actions():
                if a.action == 'B':
                    a.utility = self.original_b_utility
                    break
        self.menu_trap_frames = 0
        self.menu_trap_b_boost = 1.0
        self.menu_trap_position = None
        self.original_b_utility = None

    # =========================================================================
    # STANDARD METHODS
    # =========================================================================
    
    def add(self, p):
        self.perceptrons.append(p)
        self._cache_valid = False

    def actions(self):
        return [p for p in self.perceptrons if p.kind == "action"]

    def entities(self):
        return [p for p in self.perceptrons if p.kind == "entity"]

    def get_location_key(self, x, y, map_id, bin_size=5):
        return (int(map_id), int(x // bin_size) * bin_size, int(y // bin_size) * bin_size)

    def is_near_map_edge(self, x, y):
        return x < 10 or x > 245 or y < 10 or y > 245

    def record_action_execution(self, action_name):
        if action_name:
            self.action_execution_count[action_name] = self.action_execution_count.get(action_name, 0) + 1

    def get_position_stagnation(self):
        if len(self.last_positions) < 2:
            return 0
        current_pos = self.last_positions[-1]
        return sum(1 for pos in reversed(list(self.last_positions)[:-1]) if pos == current_pos)

    def get_group_weight(self, group):
        return sum(a.utility for a in self.actions() if a.group == group)

    # =========================================================================
    # MAP CONNECTIVITY GRAPH
    # =========================================================================

    def build_map_graph(self):
        """
        Build a connectivity graph from all known transitions across all maps.
        
        Graph structure: {map_id: [(dest_map_id, transition_dict), ...]}
        
        Each edge represents a known transition: walking from map_id through
        the transition tile reaches dest_map_id. The transition_dict contains
        position, direction, and other metadata needed to pathfind to it.
        
        Only rebuilds when _map_graph_dirty is True (set when new transitions found).
        """
        if not self._map_graph_dirty:
            return self._map_graph
        
        graph = {}
        
        for map_id, memory in self.exploration_memory.items():
            edges = []
            for t in memory.get('transitions', []):
                dest = t.get('destination_map')
                if dest is not None:
                    edges.append((dest, t))
            if edges:
                graph[map_id] = edges
        
        self._map_graph = graph
        self._map_graph_dirty = False
        
        return graph

    def find_map_path(self, from_map, to_map):
        """
        BFS on the map connectivity graph to find shortest chain of maps
        from from_map to to_map.
        
        Returns: list of map_ids [from_map, intermediate_1, ..., to_map]
                 or [] if no path exists.
        """
        if from_map == to_map:
            return [from_map]
        
        graph = self.build_map_graph()
        
        if from_map not in graph:
            return []
        
        # BFS
        from collections import deque as bfs_deque
        queue = bfs_deque([(from_map, [from_map])])
        visited = {from_map}
        
        while queue:
            current, path = queue.popleft()
            
            for dest_map, _ in graph.get(current, []):
                if dest_map == to_map:
                    return path + [dest_map]
                
                if dest_map not in visited:
                    visited.add(dest_map)
                    queue.append((dest_map, path + [dest_map]))
        
        return []

    def get_transition_to_map(self, from_map, to_map):
        """
        Find the best transition tile on from_map that leads to to_map.
        
        Returns: transition dict (with 'position', 'direction', etc.) or None.
        
        If multiple transitions lead to the same destination, picks the one
        with highest use_count (most reliable).
        """
        memory = self.get_current_map_memory(from_map)
        
        best_transition = None
        best_use_count = -1
        
        for t in memory.get('transitions', []):
            if t.get('destination_map') == to_map:
                # Skip banned transitions
                pos = tuple(t['position']) if isinstance(t['position'], list) else t['position']
                if self.is_transition_banned(from_map, pos, t['direction']):
                    continue
                
                use_count = t.get('use_count', 0)
                if use_count > best_use_count:
                    best_use_count = use_count
                    best_transition = t
        
        return best_transition

    def get_cross_map_status(self):
        """Return status dict for logging."""
        if not self.nav_map_chain:
            return {'active': False}
        
        return {
            'active': True,
            'chain': self.nav_map_chain,
            'chain_index': self.nav_chain_index,
            'chain_length': len(self.nav_map_chain),
            'current_map': self.nav_map_chain[self.nav_chain_index] if self.nav_chain_index < len(self.nav_map_chain) else None,
            'target_map': self.nav_map_chain[-1] if self.nav_map_chain else None,
            'final_target': self.nav_cross_map_target,
            'paused': self.nav_paused,
            'paused_reason': self.nav_paused_reason
        }

    # =========================================================================
    # NAVIGATION SYSTEM - A* Pathfinding + Cross-Map + Taught Targets
    # =========================================================================
    
    def load_taught_nav_targets(self, filepath=None):
        """Load human-curated navigation targets from taught_nav_targets.json."""
        filepath = filepath or TAUGHT_NAV_TARGETS_FILE
        try:
            if Path(filepath).exists():
                with open(filepath, 'r') as f:
                    data = json.load(f)
                
                self.taught_nav_targets = {}
                for map_key, targets in data.get('targets_by_map', {}).items():
                    map_id = int(map_key)
                    self.taught_nav_targets[map_id] = targets
                
                self.taught_nav_global_order = data.get('global_order', [])
                self.taught_nav_loaded = True
                
                total = sum(len(t) for t in self.taught_nav_targets.values())
                maps = list(self.taught_nav_targets.keys())
                print(f"  üéØ Loaded taught nav targets:")
                print(f"     Total targets: {total}")
                print(f"     Maps with targets: {maps}")
                print(f"     Global order entries: {len(self.taught_nav_global_order)}")
            else:
                self.taught_nav_targets = {}
                self.taught_nav_global_order = []
                self.taught_nav_loaded = False
                print(f"  No taught nav targets found at {filepath}")
        except Exception as e:
            print(f"  Error loading taught nav targets: {e}")
            self.taught_nav_targets = {}
            self.taught_nav_global_order = []
            self.taught_nav_loaded = False

    def _astar(self, start, goal, map_id):
        """A* pathfinding on visited_tiles grid, avoiding obstructions."""
        import heapq
        
        memory = self.get_current_map_memory(map_id)
        visited_tiles = memory['visited_tiles']
        obstructions = memory['obstructions']
        
        start = (int(start[0]), int(start[1]))
        goal = (int(goal[0]), int(goal[1]))
        
        if start not in visited_tiles:
            return []
        
        if goal not in visited_tiles:
            best_adj = None
            best_dist = float('inf')
            for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
                adj = (goal[0] + dx, goal[1] + dy)
                if adj in visited_tiles and adj not in obstructions:
                    d = abs(adj[0] - start[0]) + abs(adj[1] - start[1])
                    if d < best_dist:
                        best_dist = d
                        best_adj = adj
            if best_adj is None:
                return []
            goal = best_adj
        
        if start == goal:
            return [start]
        
        open_set = [(abs(goal[0] - start[0]) + abs(goal[1] - start[1]), 0, start)]
        came_from = {}
        g_score = {start: 0}
        closed = set()
        
        while open_set:
            f, g, current = heapq.heappop(open_set)
            
            if current == goal:
                path = [current]
                while current in came_from:
                    current = came_from[current]
                    path.append(current)
                path.reverse()
                return path
            
            if current in closed:
                continue
            closed.add(current)
            
            for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
                neighbor = (current[0] + dx, current[1] + dy)
                if neighbor in closed or neighbor not in visited_tiles or neighbor in obstructions:
                    continue
                new_g = g + 1
                if new_g < g_score.get(neighbor, float('inf')):
                    g_score[neighbor] = new_g
                    h = abs(goal[0] - neighbor[0]) + abs(goal[1] - neighbor[1])
                    came_from[neighbor] = current
                    heapq.heappush(open_set, (new_g + h, new_g, neighbor))
        
        return []

    def _get_frontier_tiles(self, map_id):
        """Find unvisited tiles adjacent to visited tiles (fallback targets)."""
        memory = self.get_current_map_memory(map_id)
        visited = memory['visited_tiles']
        obstructions = memory['obstructions']
        
        frontier = set()
        for vx, vy in visited:
            for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
                neighbor = (vx + dx, vy + dy)
                if neighbor not in visited and neighbor not in obstructions:
                    if 0 <= neighbor[0] <= 255 and 0 <= neighbor[1] <= 255:
                        frontier.add(neighbor)
        return list(frontier)

    def _score_nav_target(self, target, current_pos, map_id):
        """Score a fallback navigation target by novelty potential."""
        memory = self.get_current_map_memory(map_id)
        visited = memory['visited_tiles']
        obstructions = memory['obstructions']
        tx, ty = target
        
        unvisited_neighbors = 0
        for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
            n = (tx + dx, ty + dy)
            if n not in visited and n not in obstructions:
                if 0 <= n[0] <= 255 and 0 <= n[1] <= 255:
                    unvisited_neighbors += 1
        
        score = unvisited_neighbors * 2.0
        dist = abs(current_pos[0] - tx) + abs(current_pos[1] - ty)
        score -= dist * 0.05
        if target in self.nav_struck_targets:
            score -= 100.0
        return score

    def _get_taught_targets_for_map(self, map_id, current_pos):
        """Get unvisited taught nav targets for current map, sorted by nearest."""
        if not self.taught_nav_loaded:
            return []
        
        map_targets = self.taught_nav_targets.get(map_id, [])
        if not map_targets:
            return []
        
        candidates = []
        for t in map_targets:
            order = t.get('order', 0)
            if order in self.nav_visited_targets:
                continue
            pos = tuple(t['position'])
            if pos in self.nav_struck_targets:
                continue
            dist = abs(current_pos[0] - pos[0]) + abs(current_pos[1] - pos[1])
            candidates.append((pos, t, dist))
        
        candidates.sort(key=lambda x: x[2])
        return [(pos, t) for pos, t, dist in candidates]

    def _get_next_taught_target_any_map(self, current_pos, current_map):
        """
        Find the next unvisited taught target across ALL maps.
        Uses global_order to find the lowest-order unvisited target.
        
        Returns: (position, target_data, target_map_id) or (None, None, None)
        """
        if not self.taught_nav_loaded or not self.taught_nav_global_order:
            return None, None, None
        
        for entry in self.taught_nav_global_order:
            order = entry.get('order', -1)
            if order in self.nav_visited_targets:
                continue
            
            target_map = entry.get('map_id')
            pos = tuple(entry.get('position', [0, 0]))
            
            if pos in self.nav_struck_targets:
                continue
            
            # Find full target data from the per-map list
            target_data = entry
            for t in self.taught_nav_targets.get(target_map, []):
                if t.get('order') == order:
                    target_data = t
                    break
            
            return pos, target_data, target_map
        
        return None, None, None

    def build_nav_target_list(self, current_pos, map_id):
        """
        Build ranked target list.
        Priority: 
          1. Taught targets on current map (nearest unvisited)
          2. Taught targets on other maps (via cross-map nav)
          3. Fallback: frontier tiles + transitions on current map
        """
        targets = []
        
        # === PRIMARY: Taught nav targets on current map ===
        taught_targets = self._get_taught_targets_for_map(map_id, current_pos)
        
        if taught_targets:
            for pos, t_data in taught_targets:
                dist = abs(current_pos[0] - pos[0]) + abs(current_pos[1] - pos[1])
                score = t_data.get('forward_progress_score', 0.5) * 10.0
                score -= dist * 0.02
                progress_type = t_data.get('progress_type', 'unknown')
                targets.append((pos, score, f"taught_{progress_type}", map_id))
            
            targets.sort(key=lambda x: x[1], reverse=True)
            return targets
        
        # === SECONDARY: Taught targets on OTHER maps (cross-map) ===
        cross_pos, cross_data, cross_map = self._get_next_taught_target_any_map(current_pos, map_id)
        
        if cross_pos is not None and cross_map is not None and cross_map != map_id:
            # Check if we can reach that map
            map_path = self.find_map_path(map_id, cross_map)
            
            if map_path and len(map_path) > 1:
                # We have a path ‚Äî the immediate target is the transition to the next map
                next_map = map_path[1]
                transition = self.get_transition_to_map(map_id, next_map)
                
                if transition:
                    t_pos = tuple(transition['position']) if isinstance(transition['position'], list) else transition['position']
                    score = 15.0  # High priority ‚Äî cross-map progress
                    score -= len(map_path) * 0.5  # Slight penalty for longer chains
                    targets.append((t_pos, score, f"cross_map_to_{cross_map}", cross_map))
                    
                    # Store cross-map context for start_navigation to use
                    self._pending_cross_map = {
                        'final_target': cross_pos,
                        'final_map': cross_map,
                        'target_data': cross_data,
                        'map_chain': map_path,
                        'transition': transition
                    }
                    
                    targets.sort(key=lambda x: x[1], reverse=True)
                    return targets
            
            elif not map_path:
                # No path exists yet ‚Äî will pause and explore when navigation starts
                # Still add the target so start_navigation can detect the cross-map need
                targets.append(((0, 0), 5.0, f"cross_map_need_{cross_map}", cross_map))
                self._pending_cross_map = {
                    'final_target': cross_pos,
                    'final_map': cross_map,
                    'target_data': cross_data,
                    'map_chain': [],
                    'transition': None
                }
                return targets
        
        # === FALLBACK: Frontier tiles + transitions on current map ===
        frontier = self._get_frontier_tiles(map_id)
        for ft in frontier:
            score = self._score_nav_target(ft, current_pos, map_id)
            targets.append((ft, score, 'frontier', map_id))
        
        memory = self.get_current_map_memory(map_id)
        for t in memory.get('transitions', []):
            t_pos = tuple(t['position']) if isinstance(t['position'], list) else t['position']
            if self.is_transition_banned(map_id, t_pos, t['direction']):
                continue
            score = self._score_nav_target(t_pos, current_pos, map_id)
            dest_coverage = self.get_exploration_coverage(t['destination_map'])
            score += 3.0 * (1.0 - dest_coverage)
            if t_pos not in self.nav_struck_targets:
                targets.append((t_pos, score, 'transition', map_id))
        
        targets.sort(key=lambda x: x[1], reverse=True)
        return targets

    def start_navigation(self, current_pos, map_id):
        """
        Initialize navigation mode. Build target list, path to first target.
        Handles both same-map and cross-map targets.
        """
        self._pending_cross_map = None  # Reset before building
        self.nav_target_list = self.build_nav_target_list(current_pos, map_id)
        
        if not self.nav_target_list:
            return False
        
        # Check if build_nav_target_list set up a cross-map context
        if self._pending_cross_map:
            cross = self._pending_cross_map
            self._pending_cross_map = None
            
            if cross['map_chain'] and cross['transition']:
                # We have a valid path across maps
                self.nav_map_chain = cross['map_chain']
                self.nav_chain_index = 0
                self.nav_cross_map_target = cross['final_target']
                self.nav_cross_map_target_data = cross['target_data']
                self.nav_paused = False
                
                # A* to the transition tile on current map
                t_pos = tuple(cross['transition']['position']) if isinstance(cross['transition']['position'], list) else cross['transition']['position']
                path = self._astar(current_pos, t_pos, map_id)
                
                if path and len(path) > 1:
                    self.nav_active = True
                    self.nav_path = path
                    self.nav_path_index = 1
                    self.nav_target = t_pos
                    self.nav_steps_taken = 0
                    self.nav_stagnation_count = 0
                    self.nav_last_position = current_pos
                    self.nav_curiosity_countdown = 0
                    
                    chain_str = ' ‚Üí '.join(str(m) for m in self.nav_map_chain)
                    print(f"  üß≠üåç CROSS-MAP NAV START: {chain_str}")
                    print(f"     Final target: ({cross['final_target'][0]}, {cross['final_target'][1]}) on map {cross['final_map']}")
                    print(f"     Immediate: ‚Üí transition at ({t_pos[0]}, {t_pos[1]}) ‚Üí map {self.nav_map_chain[1]}")
                    self.nav_cross_map_refresh_countdown = self.NAV_CROSS_MAP_REFRESH_INTERVAL
                    return True
                else:
                    # Can't reach the transition tile ‚Äî abort cross-map
                    self._clear_cross_map_state()
                    
            elif not cross['map_chain']:
                # No path exists ‚Äî pause navigation and explore
                self.nav_map_chain = []
                self.nav_chain_index = 0
                self.nav_cross_map_target = cross['final_target']
                self.nav_cross_map_target_data = cross['target_data']
                self.nav_paused = True
                self.nav_paused_reason = f"no path to map {cross['final_map']}"
                self.nav_paused_target_map = cross['final_map']
                self.nav_pause_check_countdown = self.NAV_PAUSE_CHECK_INTERVAL
                self.nav_active = True  # Keep nav "active" but paused
                
                print(f"  üß≠‚è∏Ô∏è CROSS-MAP NAV PAUSED: no path to map {cross['final_map']}")
                print(f"     Exploring to find transitions...")
                return True  # Return True so exploration runs
        
        # Standard same-map navigation
        self.nav_target_index = 0
        return self._navigate_to_next_target(current_pos, map_id)

    def advance_map_chain(self, new_map_id, current_pos):
        """
        Called when map changes during cross-map navigation.
        Advances to the next step in the chain and sets up A* on the new map.
        
        Returns True if navigation continues, False if chain is complete or broken.
        """
        if not self.nav_map_chain:
            return False
        
        # Find where we are in the chain
        new_index = None
        for i, chain_map in enumerate(self.nav_map_chain):
            if chain_map == new_map_id:
                new_index = i
                break
        
        if new_index is None:
            # We ended up on a map not in the chain ‚Äî chain is broken
            print(f"  üß≠üåç CROSS-MAP: landed on unexpected map {new_map_id}, aborting chain")
            self._clear_cross_map_state()
            return False
        
        self.nav_chain_index = new_index
        
        # Are we on the final map?
        if new_map_id == self.nav_map_chain[-1]:
            # We've arrived at the target map ‚Äî now A* to the actual target
            if self.nav_cross_map_target:
                target_pos = self.nav_cross_map_target
                path = self._astar(current_pos, target_pos, new_map_id)
                
                if path and len(path) > 1:
                    self.nav_path = path
                    self.nav_path_index = 1
                    self.nav_target = target_pos
                    self.nav_steps_taken = 0
                    self.nav_stagnation_count = 0
                    self.nav_last_position = current_pos
                    
                    print(f"  üß≠üåç CROSS-MAP FINAL: arrived at map {new_map_id}, "
                          f"pathfinding to ({target_pos[0]}, {target_pos[1]})")
                    return True
                else:
                    print(f"  üß≠üåç CROSS-MAP: can't reach target on final map {new_map_id}")
                    self._clear_cross_map_state()
                    return False
            else:
                self._clear_cross_map_state()
                return False
        
        # We're on an intermediate map ‚Äî find transition to next map in chain
        next_map = self.nav_map_chain[new_index + 1]
        transition = self.get_transition_to_map(new_map_id, next_map)
        
        if transition:
            t_pos = tuple(transition['position']) if isinstance(transition['position'], list) else transition['position']
            path = self._astar(current_pos, t_pos, new_map_id)
            
            if path and len(path) > 1:
                self.nav_path = path
                self.nav_path_index = 1
                self.nav_target = t_pos
                self.nav_steps_taken = 0
                self.nav_stagnation_count = 0
                self.nav_last_position = current_pos
                
                remaining = len(self.nav_map_chain) - new_index - 1
                print(f"  üß≠üåç CROSS-MAP STEP: map {new_map_id} ‚Üí transition to map {next_map} "
                      f"({remaining} maps remaining)")
                return True
            else:
                print(f"  üß≠üåç CROSS-MAP: can't reach transition on map {new_map_id}")
                # Pause ‚Äî maybe we need to explore this map more
                self.nav_paused = True
                self.nav_paused_reason = f"can't reach transition to map {next_map}"
                self.nav_paused_target_map = next_map
                self.nav_pause_check_countdown = self.NAV_PAUSE_CHECK_INTERVAL
                return True
        else:
            # No transition found to next map from here
            print(f"  üß≠üåç CROSS-MAP: no transition from map {new_map_id} to map {next_map}")
            self.nav_paused = True
            self.nav_paused_reason = f"no transition to map {next_map}"
            self.nav_paused_target_map = next_map
            self.nav_pause_check_countdown = self.NAV_PAUSE_CHECK_INTERVAL
            return True

    def check_nav_pause_resume(self, current_pos, map_id):
        """
        Called periodically when nav is paused. Checks if new transitions
        have been discovered that allow resuming cross-map navigation.
        
        Returns True if navigation resumed, False if still paused.
        """
        if not self.nav_paused:
            return False
        
        self.nav_pause_check_countdown -= 1
        if self.nav_pause_check_countdown > 0:
            return False
        
        self.nav_pause_check_countdown = self.NAV_PAUSE_CHECK_INTERVAL
        
        target_map = self.nav_paused_target_map
        if target_map is None:
            return False
        
        # Rebuild graph with any new transitions
        self._map_graph_dirty = True
        
        # Check if we now have a path to the final target map
        final_map = self.nav_map_chain[-1] if self.nav_map_chain else target_map
        if self.nav_cross_map_target_data:
            final_map_from_data = None
            # Try to get the map from global order
            for entry in self.taught_nav_global_order:
                if tuple(entry.get('position', [])) == self.nav_cross_map_target:
                    final_map_from_data = entry.get('map_id')
                    break
            if final_map_from_data:
                final_map = final_map_from_data
        
        new_path = self.find_map_path(map_id, final_map)
        
        if new_path and len(new_path) > 1:
            # Path found ‚Äî resume navigation
            self.nav_map_chain = new_path
            self.nav_chain_index = 0
            self.nav_paused = False
            self.nav_paused_reason = ""
            self.nav_paused_target_map = None
            
            # Set up A* to the transition tile for the next map
            next_map = new_path[1]
            transition = self.get_transition_to_map(map_id, next_map)
            
            if transition:
                t_pos = tuple(transition['position']) if isinstance(transition['position'], list) else transition['position']
                path = self._astar(current_pos, t_pos, map_id)
                
                if path and len(path) > 1:
                    self.nav_path = path
                    self.nav_path_index = 1
                    self.nav_target = t_pos
                    self.nav_steps_taken = 0
                    self.nav_stagnation_count = 0
                    self.nav_last_position = current_pos
                    
                    chain_str = ' ‚Üí '.join(str(m) for m in new_path)
                    print(f"  üß≠‚ñ∂Ô∏è CROSS-MAP NAV RESUMED: {chain_str}")
                    return True
            
            # Have the path but can't reach the transition yet
            self.nav_paused = True
            self.nav_paused_reason = f"can't reach transition to map {next_map}"
        
        return False

    def refresh_cross_map_navigation(self, current_pos, current_map):
        """
        Periodic recalculation of cross-map path from current position.
        Handles: wrong map, new transitions discovered, agent drifted off course.
        
        Called every NAV_CROSS_MAP_REFRESH_INTERVAL steps during active cross-map nav.
        
        Returns True if navigation was successfully refreshed, False if broken.
        """
        if not self.nav_cross_map_target:
            return False
        
        final_target = self.nav_cross_map_target
        
        # Find the final map from the chain or from target data
        final_map = self.nav_map_chain[-1] if self.nav_map_chain else None
        if final_map is None:
            return False
        
        # Are we already on the final map?
        if current_map == final_map:
            # Just re-A* to the actual target
            path = self._astar(current_pos, final_target, current_map)
            if path and len(path) > 1:
                self.nav_path = path
                self.nav_path_index = 1
                self.nav_target = final_target
                self.nav_stagnation_count = 0
                self.nav_last_position = current_pos
                self.nav_paused = False
                print(f"  üß≠üîÑ CROSS-MAP REFRESH: on final map {current_map}, re-pathing to target")
                return True
            else:
                print(f"  üß≠üîÑ CROSS-MAP REFRESH: on final map but can't reach target")
                return False
        
        # Rebuild map graph (picks up any new transitions)
        self._map_graph_dirty = True
        new_chain = self.find_map_path(current_map, final_map)
        
        if not new_chain:
            # No path from here ‚Äî pause and explore
            self.nav_map_chain = [current_map]  # At least know where we are
            self.nav_chain_index = 0
            self.nav_paused = True
            self.nav_paused_reason = f"refresh: no path from map {current_map} to map {final_map}"
            self.nav_paused_target_map = final_map
            self.nav_pause_check_countdown = self.NAV_PAUSE_CHECK_INTERVAL
            print(f"  üß≠üîÑ CROSS-MAP REFRESH: no path from {current_map} to {final_map}, pausing")
            return True  # Still "active" but paused
        
        # Update the chain
        old_chain = self.nav_map_chain
        self.nav_map_chain = new_chain
        self.nav_chain_index = 0
        
        # A* to the transition for the next map in the new chain
        next_map = new_chain[1]
        transition = self.get_transition_to_map(current_map, next_map)
        
        if transition:
            t_pos = tuple(transition['position']) if isinstance(transition['position'], list) else transition['position']
            path = self._astar(current_pos, t_pos, current_map)
            
            if path and len(path) > 1:
                self.nav_path = path
                self.nav_path_index = 1
                self.nav_target = t_pos
                self.nav_stagnation_count = 0
                self.nav_last_position = current_pos
                self.nav_paused = False
                
                old_str = ' ‚Üí '.join(str(m) for m in old_chain)
                new_str = ' ‚Üí '.join(str(m) for m in new_chain)
                if old_chain != new_chain:
                    print(f"  üß≠üîÑ CROSS-MAP REFRESH: chain updated {old_str} ‚Üí {new_str}")
                else:
                    print(f"  üß≠üîÑ CROSS-MAP REFRESH: re-pathed on map {current_map}")
                return True
            else:
                # Can't reach transition ‚Äî pause
                self.nav_paused = True
                self.nav_paused_reason = f"refresh: can't reach transition to map {next_map}"
                self.nav_paused_target_map = next_map
                self.nav_pause_check_countdown = self.NAV_PAUSE_CHECK_INTERVAL
                print(f"  üß≠üîÑ CROSS-MAP REFRESH: can't reach transition to {next_map}, pausing")
                return True
        else:
            self.nav_paused = True
            self.nav_paused_reason = f"refresh: no transition to map {next_map}"
            self.nav_paused_target_map = next_map
            self.nav_pause_check_countdown = self.NAV_PAUSE_CHECK_INTERVAL
            print(f"  üß≠üîÑ CROSS-MAP REFRESH: no transition to {next_map}, pausing")
            return True

    def _clear_cross_map_state(self):
        """Reset all cross-map navigation state."""
        self.nav_map_chain = []
        self.nav_chain_index = 0
        self.nav_cross_map_target = None
        self.nav_cross_map_target_data = None
        self.nav_paused = False
        self.nav_paused_reason = ""
        self.nav_paused_target_map = None
        self.nav_pause_check_countdown = 0
        self.nav_cross_map_refresh_countdown = 0

    def _navigate_to_next_target(self, current_pos, map_id):
        """Try to path to the next target in the list."""
        while self.nav_target_index < len(self.nav_target_list):
            entry = self.nav_target_list[self.nav_target_index]
            target = entry[0]
            score = entry[1]
            target_type = entry[2]
            target_map = entry[3] if len(entry) > 3 else map_id
            
            if target in self.nav_struck_targets:
                self.nav_target_index += 1
                continue
            
            # Skip cross-map placeholder targets
            if target_type.startswith('cross_map_need'):
                self.nav_target_index += 1
                continue
            
            path = self._astar(current_pos, target, map_id)
            
            if path and len(path) > 1:
                self.nav_active = True
                self.nav_path = path
                self.nav_path_index = 1
                self.nav_target = target
                self.nav_steps_taken = 0
                self.nav_stagnation_count = 0
                self.nav_last_position = current_pos
                self.nav_curiosity_countdown = 0
                
                print(f"  üß≠ NAV START: ‚Üí ({target[0]}, {target[1]}) [{target_type}] "
                      f"score={score:.1f} path={len(path)} steps")
                return True
            
            self.nav_target_index += 1
        
        self.abort_navigation("no valid targets")
        return False

    def get_nav_action(self, current_pos):
        """Get next action from A* path."""
        if not self.nav_active or not self.nav_path:
            return None
        
        # If paused, no path to follow ‚Äî let curiosity run
        if self.nav_paused:
            return None
        
        if self.nav_path_index < len(self.nav_path):
            next_tile = self.nav_path[self.nav_path_index]
            
            if current_pos == next_tile:
                self.nav_path_index += 1
                if self.nav_path_index >= len(self.nav_path):
                    return None
                next_tile = self.nav_path[self.nav_path_index]
            
            dx = next_tile[0] - current_pos[0]
            dy = next_tile[1] - current_pos[1]
            
            if dx > 0: return "RIGHT"
            elif dx < 0: return "LEFT"
            elif dy > 0: return "DOWN"
            elif dy < 0: return "UP"
        
        return None

    def update_nav_state(self, current_pos, map_id):
        """Track navigation progress. Returns True to continue, False to end."""
        if not self.nav_active:
            return False
        
        # If paused, check for resume periodically
        if self.nav_paused:
            self.check_nav_pause_resume(current_pos, map_id)
            return True  # Stay "active" but paused ‚Äî curiosity runs
        
        self.nav_steps_taken += 1
        
        # Periodic cross-map refresh
        if self.nav_map_chain:
            self.nav_cross_map_refresh_countdown -= 1
            if self.nav_cross_map_refresh_countdown <= 0:
                self.nav_cross_map_refresh_countdown = self.NAV_CROSS_MAP_REFRESH_INTERVAL
                self.refresh_cross_map_navigation(current_pos, map_id)
        
        if self.nav_steps_taken >= self.NAV_MAX_STEPS:
            self.abort_navigation("max steps reached")
            return False
        
        if current_pos == self.nav_last_position:
            self.nav_stagnation_count += 1
            if self.nav_stagnation_count >= self.NAV_STAGNATION_LIMIT:
                self.abort_navigation("stuck during pathfinding")
                return False
        else:
            self.nav_stagnation_count = 0
        self.nav_last_position = current_pos
        
        if self.nav_target:
            dist_to_target = abs(current_pos[0] - self.nav_target[0]) + abs(current_pos[1] - self.nav_target[1])
            if dist_to_target <= 1:
                # Check if this is a cross-map transition target
                if self.nav_map_chain and self.nav_chain_index < len(self.nav_map_chain) - 1:
                    # We're heading to a transition ‚Äî don't start curiosity window,
                    # just let the map change happen naturally
                    print(f"  üß≠üåç Approaching transition at ({self.nav_target[0]}, {self.nav_target[1]})")
                    return True  # Keep navigating ‚Äî map change will trigger advance_map_chain
                
                if self.nav_curiosity_countdown == 0:
                    self.nav_curiosity_countdown = self.NAV_CURIOSITY_WINDOW
                    self._mark_taught_target_visited(self.nav_target)
                    print(f"  üß≠ NAV ARRIVED: ({self.nav_target[0]}, {self.nav_target[1]}) "
                          f"‚Äî curiosity window {self.NAV_CURIOSITY_WINDOW} steps")
                    return False
        
        return True

    def _mark_taught_target_visited(self, position):
        """Mark a taught nav target as visited by its order number."""
        if not self.taught_nav_loaded:
            return
        pos_tuple = tuple(position)
        # Check current map first, then all maps (for cross-map targets)
        maps_to_check = [self.current_map_id] if self.current_map_id is not None else []
        maps_to_check += [m for m in self.taught_nav_targets.keys() if m != self.current_map_id]
        
        for check_map in maps_to_check:
            for t in self.taught_nav_targets.get(check_map, []):
                t_pos = tuple(t['position'])
                if t_pos == pos_tuple:
                    order = t.get('order', -1)
                    if order >= 0:
                        self.nav_visited_targets.add(order)
                        print(f"  üß≠ TARGET VISITED: order #{order} at ({pos_tuple[0]}, {pos_tuple[1]}) on map {check_map}")
                    return

    def complete_nav_target(self, found_novelty):
        """After curiosity window: if novelty found, done. If not, strike and try next."""
        if found_novelty:
            print(f"  üß≠ NAV SUCCESS: Novelty found at ({self.nav_target[0]}, {self.nav_target[1]})")
            self.abort_navigation("novelty found")
            return
        
        if self.nav_target:
            self.nav_struck_targets.add(self.nav_target)
            print(f"  üß≠ NAV STRIKE: ({self.nav_target[0]}, {self.nav_target[1]}) ‚Äî no novelty, trying next")
        
        # If we were doing cross-map and arrived at final target, clear cross-map state
        if self.nav_map_chain and self.nav_chain_index >= len(self.nav_map_chain) - 1:
            self._clear_cross_map_state()
        
        self.nav_target_index += 1
        current_pos = self.nav_last_position or (0, 0)
        map_id = self.current_map_id
        
        if not self._navigate_to_next_target(current_pos, map_id):
            self.abort_navigation("all targets exhausted")

    def abort_navigation(self, reason=""):
        """End navigation mode. Clears both single-map and cross-map state."""
        if self.nav_active:
            cross_info = ""
            if self.nav_map_chain:
                cross_info = f" [cross-map chain: {' ‚Üí '.join(str(m) for m in self.nav_map_chain)}]"
            print(f"  üß≠ NAV END: {reason} (took {self.nav_steps_taken} steps){cross_info}")
        
        self.nav_active = False
        self.nav_path = []
        self.nav_path_index = 0
        self.nav_target = None
        self.nav_steps_taken = 0
        self.nav_stagnation_count = 0
        self.nav_curiosity_countdown = 0
        self._clear_cross_map_state()

    def update_known_area_counter(self, raw_x, raw_y, map_id):
        """Track consecutive steps in visited tiles. Triggers nav at threshold."""
        memory = self.get_current_map_memory(map_id)
        pos = (int(raw_x), int(raw_y))
        if pos in memory['visited_tiles']:
            self.known_area_counter += 1
        else:
            self.known_area_counter = 0

    def should_start_navigation(self):
        if self.nav_active:
            return False
        if self.known_area_counter < self.KNOWN_AREA_TRIGGER:
            return False
        return True

    def is_nav_active(self):
        return self.nav_active

    def is_nav_paused(self):
        return self.nav_paused

    def is_in_nav_curiosity_window(self):
        return self.nav_curiosity_countdown > 0

    def tick_nav_curiosity_window(self):
        if self.nav_curiosity_countdown > 0:
            self.nav_curiosity_countdown -= 1
            if self.nav_curiosity_countdown <= 0:
                return True
        return False

    def get_nav_targets_status(self):
        if not self.taught_nav_loaded:
            return {'loaded': False, 'total': 0, 'visited': 0, 'remaining': 0}
        total = sum(len(t) for t in self.taught_nav_targets.values())
        visited = len(self.nav_visited_targets)
        return {'loaded': True, 'total': total, 'visited': visited, 'remaining': total - visited}


# ============================================================================
# CELL 3 PART 5: Stagnation, Blend Triggers, Mode Swap, Repetition/Pattern
# ============================================================================
# NO CHANGES ‚Äî reorganized from original Cell 3 Part 3
# ============================================================================

    # =========================================================================
    # BLEND TIER DETECTION
    # =========================================================================
    
    def get_blend_tier(self):
        """
        Determine blend tier based on current stagnation metrics.
        Returns 0 (no blend), 1 (light), 2 (medium), or 3 (hard).
        Higher tier = more taught model influence.
        """
        # Tier 3 (hard): extreme stagnation
        t3 = self.BLEND_TIER_TRIGGERS[3]
        if (self.detected_pattern and self.pattern_repeat_count >= t3['pattern_repeats']):
            return 3
        if self.state_stagnation_count >= self.STATE_STAGNATION_THRESHOLD * t3['state_stagnation_mult']:
            return 3
        
        # Tier 2 (medium): significant stagnation
        t2 = self.BLEND_TIER_TRIGGERS[2]
        if (self.detected_pattern and self.pattern_repeat_count >= t2['pattern_repeats']):
            return 2
        if self.get_position_stagnation() >= t2['pos_stagnation']:
            return 2
        if self.consecutive_action_count >= t2['consecutive']:
            return 2
        
        # Tier 1 (light): early stagnation
        t1 = self.BLEND_TIER_TRIGGERS[1]
        if (self.detected_pattern and self.pattern_repeat_count >= t1['pattern_repeats']):
            return 1
        if self.get_position_stagnation() >= t1['pos_stagnation']:
            return 1
        if self.consecutive_action_count >= t1['consecutive']:
            return 1
        
        return 0
    
    def try_blend_if_needed(self):
        """
        Check if blend should trigger. Called from action selection.
        Returns True if a blend was performed.
        """
        if not self.taught_reference['loaded']:
            return False
        
        tier = self.get_blend_tier()
        
        if tier == 0:
            return False
        
        # Only blend if tier escalated or cooldown passed
        if tier <= self.blend_tier and (self.timestep - self.last_blend_timestep) < self.BLEND_COOLDOWN:
            return False
        
        self.blend_from_taught(tier)
        return True

    # =========================================================================
    # MODE SWAP & STAGNATION  
    # =========================================================================
    
    def get_context_state_hash(self, context_state):
        return (round(context_state[0], 2), round(context_state[1], 2), int(context_state[2]),
                int(context_state[3]), round(context_state[4], 2), int(context_state[5]))

    def check_state_stagnation(self, context_state):
        current_hash = self.get_context_state_hash(context_state)
        if current_hash == self.last_context_state_hash:
            self.state_stagnation_count += 1
            if self.state_stagnation_count == 1 and self.last_action:
                self.stagnation_initiator_action = self.last_action
        else:
            self.state_stagnation_count = 0
            self.stagnation_initiator_action = None
        self.last_context_state_hash = current_hash
        return self.state_stagnation_count >= self.STATE_STAGNATION_THRESHOLD

    def check_position_stagnation(self):
        return self.get_position_stagnation()

    def should_force_random(self):
        """
        Returns True if the agent is badly stuck and needs forced randomization.
        Also triggers blend from taught model before randomizing.
        """
        force = False
        
        if self.get_position_stagnation() >= 8:
            force = True
        if self.consecutive_action_count >= 15:
            force = True
        if self.detected_pattern and self.pattern_repeat_count >= 4:
            force = True
        if self.state_stagnation_count >= self.STATE_STAGNATION_THRESHOLD * 2:
            force = True
        
        if force:
            # Attempt blend before randomizing ‚Äî blend fixes priorities,
            # random breaks the immediate loop
            self.try_blend_if_needed()
        
        return force

    def get_forced_random_action_name(self):
        """Pick a random action that ISN'T the currently repeated one or in the pattern."""
        candidates = ["UP", "DOWN", "LEFT", "RIGHT", "A", "B"]
        
        if self.current_repeated_action and self.current_repeated_action in candidates:
            candidates.remove(self.current_repeated_action)
        
        if self.detected_pattern:
            for a in self.detected_pattern:
                if a in candidates:
                    candidates.remove(a)
        
        if not candidates:
            candidates = ["UP", "DOWN", "LEFT", "RIGHT"]
            if self.current_repeated_action in candidates:
                candidates.remove(self.current_repeated_action)
        
        if not candidates:
            candidates = ["UP", "DOWN", "LEFT", "RIGHT"]
        
        return random.choice(candidates)

    def check_direction_change_progress(self, context_state):
        current_dir = int(context_state[5])
        if self.last_direction_for_progress is None:
            self.last_direction_for_progress = current_dir
            return False
        changed = current_dir != self.last_direction_for_progress
        self.last_direction_for_progress = current_dir
        return changed

    def apply_stagnation_initiator_penalty(self):
        if self.stagnation_initiator_action is None:
            return
        for a in self.actions():
            if a.action == self.stagnation_initiator_action:
                old_util = a.utility
                floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                a.utility = max(floor, a.utility * 0.5)
                print(f"  üìç STAGNATION PENALTY: {self.stagnation_initiator_action} {old_util:.3f} ‚Üí {a.utility:.3f}")
                break
        self.stagnation_initiator_action = None

    def check_productive_change(self, context_state):
        current_map = int(context_state[2])
        current_battle = context_state[3] > 0.5
        current_pos = (context_state[0], context_state[1])
        productive, reason = False, ""
        
        if self.last_map_id is not None and current_map != self.last_map_id:
            productive, reason = True, "map change"
        if self.last_battle_state is not None and current_battle != self.last_battle_state:
            productive, reason = True, "battle change"
        if self.position_at_mode_swap is not None:
            dist = np.sqrt((current_pos[0] - self.position_at_mode_swap[0])**2 + 
                          (current_pos[1] - self.position_at_mode_swap[1])**2)
            if dist > 0.03:
                productive, reason = True, f"moved {dist*255:.1f} tiles"
        
        if self.direction_change_counts_as_progress and self.check_direction_change_progress(context_state):
            self.state_stagnation_count = max(0, self.state_stagnation_count - 5)
        
        self.last_map_id = current_map
        self.last_battle_state = current_battle
        return productive, reason

    def on_productive_change(self, reason):
        self.move_to_interact_threshold = self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD
        self.interact_to_move_threshold = self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD
        self.swap_chain_count = 0
        self.state_stagnation_count = 0
        self.stagnation_initiator_action = None
        self.unproductive_swap_count = 0
        
        # Reset blend tier on productive progress
        if self.blend_tier > 0:
            print(f"  ‚úÖ Blend tier reset: {self.blend_tier} ‚Üí 0 ({reason})")
            self.blend_tier = 0
        
        # Reset known area counter ‚Äî productive change means fresh territory
        self.known_area_counter = 0

    def on_mode_swap(self, from_mode, to_mode):
        self.swap_chain_count += 1
        self.frames_in_current_mode = 0
        self.unproductive_swap_count += 1
        if self.unproductive_swap_count >= self.UNPRODUCTIVE_SWAP_THRESHOLD:
            self._reset_highest_to_third(to_mode)
            self.unproductive_swap_count = 0
        if to_mode == "interact":
            self.interact_to_move_threshold = min(self.MAX_THRESHOLD, self.interact_to_move_threshold + self.THRESHOLD_INCREMENT)
        else:
            self.move_to_interact_threshold = min(self.MAX_THRESHOLD, self.move_to_interact_threshold + self.THRESHOLD_INCREMENT)

    def _reset_highest_to_third(self, mode):
        if mode in ["battle", "both"]:
            return
        group = "move" if mode == "move" else "interact"
        group_actions = [a for a in self.actions() if a.group == group]
        if len(group_actions) < 3:
            return
        sorted_actions = sorted(group_actions, key=lambda a: a.utility, reverse=True)
        floor = self.INTERACT_UTILITY_FLOOR if group == "interact" else self.MOVE_UTILITY_FLOOR
        sorted_actions[0].utility = max(sorted_actions[2].utility * 0.9, floor)

    def should_use_both_mode(self):
        return (self.state_stagnation_count > self.BOTH_MODE_STAGNATION_THRESHOLD or 
                self.unproductive_swap_count > self.BOTH_MODE_SWAP_THRESHOLD)

    def determine_control_mode(self, context_state, raw_position=None):
        if context_state[3] > 0.5:
            return "battle"
        
        self.frames_in_current_mode += 1
        position_stagnation = self.get_position_stagnation()
        
        productive, reason = self.check_productive_change(context_state)
        if productive:
            self.on_productive_change(reason)
        
        if self.should_use_both_mode():
            return "both"
        
        if self.check_state_stagnation(context_state):
            self.apply_stagnation_initiator_penalty()
            new_mode = "interact" if self.control_mode == "move" else "move"
            self.control_mode = new_mode
            self.position_at_mode_swap = (context_state[0], context_state[1])
            self.on_mode_swap(self.control_mode, new_mode)
            self.state_stagnation_count = 0
            return self.control_mode
        
        raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
        raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
        current_map = int(context_state[2])
        
        tile_needs_probing = self.should_interact_at_tile(raw_x, raw_y, current_map)
        untried_directions = self.get_untried_directions(raw_x, raw_y, current_map)
        
        if tile_needs_probing and untried_directions and self.control_mode == "move" and self.frames_in_current_mode >= 3:
            self.control_mode = "interact"
            self.position_at_mode_swap = (context_state[0], context_state[1])
            self.frames_in_current_mode = 0
            return self.control_mode
        
        if self.control_mode == "move" and position_stagnation >= self.move_to_interact_threshold:
            self.control_mode = "interact"
            self.position_at_mode_swap = (context_state[0], context_state[1])
            self.on_mode_swap("move", "interact")
        elif self.control_mode == "interact":
            if (not tile_needs_probing or not untried_directions) and self.frames_in_current_mode >= 5:
                self.control_mode = "move"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.frames_in_current_mode = 0
            elif self.frames_in_current_mode >= self.interact_to_move_threshold:
                self.control_mode = "move"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.on_mode_swap("interact", "move")
        
        return self.control_mode

    # =========================================================================
    # EXPLORATION TRACKING
    # =========================================================================
    
    def update_exploration_tracking(self, context_state, prev_context_state, raw_position=None, prev_raw_position=None):
        current_map = int(context_state[2])
        raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
        raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
        current_pos = (raw_x, raw_y)
        
        if self.current_map_id is not None and current_map != self.current_map_id:
            prev_map = self.current_map_id
            if prev_context_state is not None and prev_raw_position is not None:
                self.record_transition(prev_raw_position, prev_map, current_map,
                    int(prev_context_state[5]), 'interact' if self.last_action == 'A' else 'walk')
            if prev_raw_position is not None:
                entry_dir = int(context_state[5]) if prev_context_state is not None else 0
                self.create_transition_ban(current_map, current_pos, (entry_dir + 2) % 4)
            self.on_map_change(current_map)
        
        self.current_map_id = current_map
        self.record_visited_tile(raw_x, raw_y, current_map)
        self.accumulate_temp_debt(current_map)
        self.update_transition_ban(current_map, current_pos)
        self.check_ban_lift_conditions(current_map)
        
        if prev_context_state is not None and prev_raw_position is not None:
            self.detect_obstruction(prev_context_state, context_state, raw_position, prev_raw_position)
        
        self.check_interaction_verification(context_state, prev_context_state)
        self.last_direction = int(context_state[5])
        
        if self.timestep % 300 == 0:
            self.decay_all_debts()

    def on_map_change(self, new_map):
        self.save_exploration_memory()
        self.control_mode = "move"
        self.frames_in_current_mode = 0
        
        # Abort navigation on map change
        if self.nav_active:
            self.abort_navigation("map changed")
        self.known_area_counter = 0
        self.nav_struck_targets.clear()
        
        memory = self.get_current_map_memory(new_map)
        tile_interactions = memory.get('tile_interactions', {})
        print(f"  üó∫Ô∏è MAP CHANGE ‚Üí {new_map}: {len(memory['visited_tiles'])} visited, {len(memory['obstructions'])} obs")
        print(f"     Tiles probed: {len(tile_interactions)}, exhausted: {sum(1 for t in tile_interactions.values() if t.get('exhausted', False))}")

    # =========================================================================
    # REPETITION & PATTERN HANDLING
    # =========================================================================
    
    def track_consecutive_action(self, action_name):
        if action_name == self.current_repeated_action:
            self.consecutive_action_count += 1
        else:
            self.current_repeated_action = action_name
            self.consecutive_action_count = 1

    def get_learning_multiplier(self, action_name):
        if action_name != self.current_repeated_action or self.consecutive_action_count < self.LEARNING_SLOWDOWN_START:
            return 1.0
        progress = min(1.0, (self.consecutive_action_count - self.LEARNING_SLOWDOWN_START) / 
                       (self.LEARNING_SLOWDOWN_MAX - self.LEARNING_SLOWDOWN_START))
        return max(0.05, 1.0 - 0.95 * progress)

    def get_nth_highest_utility(self, group, n=3):
        utilities = sorted([a.utility for a in self.actions() if a.group == group], reverse=True)
        if len(utilities) < n:
            return self.INTERACT_UTILITY_FLOOR if group == "interact" else self.MOVE_UTILITY_FLOOR
        return utilities[n-1]

    def detect_pattern(self):
        if len(self.action_history) < 6:
            return None, 0
        recent = list(self.action_history)[-self.PATTERN_CHECK_WINDOW:]
        for pattern_len in range(1, self.PATTERN_MAX_LENGTH + 1):
            if len(recent) < pattern_len * self.PATTERN_MIN_REPEATS:
                continue
            candidate = tuple(recent[-pattern_len:])
            repeat_count, idx = 0, len(recent) - pattern_len
            while idx >= 0 and tuple(recent[idx:idx + pattern_len]) == candidate:
                repeat_count += 1
                idx -= pattern_len
            if repeat_count >= self.PATTERN_MIN_REPEATS:
                return candidate, repeat_count
        return None, 0

    def apply_pattern_penalty(self):
        pattern, repeat_count = self.detect_pattern()
        if pattern is None:
            self.detected_pattern, self.pattern_repeat_count = None, 0
            return
        self.detected_pattern, self.pattern_repeat_count = pattern, repeat_count
        
        penalty_factor = max(0.3, 1.0 - repeat_count * 0.15)
        
        for action_name in set(pattern):
            for a in self.actions():
                if a.action == action_name:
                    floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                    a.utility = max(floor, a.utility * penalty_factor)
                    break

    def apply_repetition_penalty(self):
        if self.current_repeated_action is None:
            return
        for a in self.actions():
            if a.action == self.current_repeated_action:
                floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                if self.consecutive_action_count >= self.HARD_RESET_THRESHOLD:
                    a.utility = floor
                    self.consecutive_action_count = 0
                    print(f"  üî® HARD RESET: {a.action} ‚Üí {floor:.3f}")
                elif self.consecutive_action_count >= self.PENALTY_THRESHOLD:
                    a.utility = max(a.utility * 0.5, floor)
                break


# ============================================================================
# CELL 3 PART 6: Entity Spawning/Clustering, Learning, Save/Load
# ============================================================================
# NO CHANGES ‚Äî reorganized from original Cell 3 Part 3
# ============================================================================

    # =========================================================================
    # ENTITY SPAWNING & CLUSTERING
    # =========================================================================
    
    def spawn_entity_from_novelty(self, learning_state, context_state, raw_position=None):
        """
        Spawn a new entity perceptron from the current novel state.
        The entity's weights are initialized from the state that surprised the AI,
        so it learns to detect similar situations in the future.
        No duplicate checking ‚Äî clustering handles redundancy later.
        """
        entity = Perceptron("entity", entity_type=f"spawned_{self.entity_spawn_count}")
        entity.ensure_weights(len(learning_state))
        
        state_norm = np.linalg.norm(learning_state)
        if state_norm > 0:
            entity.weights = (learning_state / state_norm) * 0.1
        else:
            entity.weights = np.random.randn(len(learning_state)) * 0.001
        
        entity.utility = 1.0
        self.add(entity)
        self.entity_spawn_count += 1
        
        self.check_entity_capacity()
    
    def check_entity_capacity(self):
        """
        If entity count exceeds capacity, run clustering.
        If clustering didn't reduce count enough, expand capacity by 50%.
        """
        n_entities = len(self.entities())
        
        if n_entities < self.entity_capacity:
            return
        
        before_count = n_entities
        self.cluster_entities()
        after_count = len(self.entities())
        
        if after_count >= before_count * 0.9:
            old_cap = self.entity_capacity
            self.entity_capacity = int(self.entity_capacity * self.ENTITY_CAPACITY_GROWTH)
            print(f"  üß© Entity capacity expanded: {old_cap} ‚Üí {self.entity_capacity} "
                  f"(clustering only reduced {before_count} ‚Üí {after_count})")
    
    def cluster_entities(self):
        """
        Cluster similar entity perceptrons using cosine similarity on activation patterns.
        Merge similar entities by averaging their weights.
        """
        entities = self.entities()
        
        innate_types = {"sense_menu", "sense_battle", "sense_movement", "sense_map_transition"}
        spawned = [e for e in entities if e.entity_type not in innate_types]
        innate = [e for e in entities if e.entity_type in innate_types]
        
        if len(spawned) < 2:
            return
        
        clusterable = []
        too_young = []
        
        for e in spawned:
            if len(e.cluster_activations) >= self.ENTITY_MIN_ACTIVATIONS:
                clusterable.append(e)
            else:
                too_young.append(e)
        
        if len(clusterable) < 2:
            return
        
        max_len = max(len(e.cluster_activations) for e in clusterable)
        activation_vecs = []
        for e in clusterable:
            vec = list(e.cluster_activations)
            while len(vec) < max_len:
                vec.append(0.0)
            activation_vecs.append(np.array(vec))
        
        merged_indices = set()
        merge_groups = []
        
        for i in range(len(clusterable)):
            if i in merged_indices:
                continue
            group = [i]
            vec_i = activation_vecs[i]
            norm_i = np.linalg.norm(vec_i)
            if norm_i < 1e-10:
                continue
            
            for j in range(i + 1, len(clusterable)):
                if j in merged_indices:
                    continue
                vec_j = activation_vecs[j]
                norm_j = np.linalg.norm(vec_j)
                if norm_j < 1e-10:
                    continue
                
                cosine_sim = np.dot(vec_i, vec_j) / (norm_i * norm_j)
                
                if cosine_sim >= self.ENTITY_CLUSTER_SIMILARITY:
                    group.append(j)
                    merged_indices.add(j)
            
            if len(group) > 1:
                merged_indices.add(i)
                merge_groups.append(group)
        
        if not merge_groups:
            return
        
        new_entities = []
        merged_set = set()
        
        for group in merge_groups:
            group_entities = [clusterable[idx] for idx in group]
            
            min_dim = min(len(e.weights) for e in group_entities if e.weights is not None)
            if min_dim == 0:
                continue
            
            avg_weights = np.zeros(min_dim)
            for e in group_entities:
                avg_weights += e.weights[:min_dim]
            avg_weights /= len(group_entities)
            
            merged = Perceptron("entity", entity_type=f"merged_{self.entity_merge_count}")
            merged.weights = avg_weights
            merged.utility = max(e.utility for e in group_entities)
            merged.familiarity = np.mean([e.familiarity for e in group_entities])
            merged.learning_rate = np.mean([e.learning_rate for e in group_entities])
            
            new_entities.append(merged)
            self.entity_merge_count += 1
            
            for idx in group:
                merged_set.add(id(clusterable[idx]))
        
        kept_spawned = [e for e in clusterable if id(e) not in merged_set]
        
        self.perceptrons = (
            [p for p in self.perceptrons if p.kind == "action"] +
            innate +
            kept_spawned +
            too_young +
            new_entities
        )
        self._cache_valid = False
        
        total_merged = sum(len(g) for g in merge_groups)
        print(f"  üß© CLUSTERED: {total_merged} entities ‚Üí {len(new_entities)} merged "
              f"| Total entities now: {len(self.entities())}")

    # =========================================================================
    # ENTITY & LEARNING
    # =========================================================================
    
    def spawn_innate_entities(self, learning_state):
        if self.innate_entities_spawned:
            return
        for etype, indices in [("sense_menu", [5, 6]), ("sense_battle", [3, 4]), 
                                ("sense_movement", [0, 1]), ("sense_map_transition", [2])]:
            entity = Perceptron("entity", entity_type=etype)
            entity.ensure_weights(len(learning_state))
            entity.weights = np.zeros(len(learning_state))
            for idx in indices:
                entity.weights[idx] = 0.5 if len(indices) > 1 else 1.0
            self.add(entity)
        self.innate_entities_spawned = True

    def enforce_utility_floors(self):
        for a in self.actions():
            floor = self.MOVE_UTILITY_FLOOR if a.group == "move" else self.INTERACT_UTILITY_FLOOR
            a.utility = max(a.utility, floor)

    def get_spawn_threshold_adaptive(self, error_type='combined', percentile=50):
        history = {'numeric': self.numeric_error_history, 'visual': self.visual_error_history}.get(error_type, self.error_history)
        return max(0.001, np.percentile(history, percentile)) if len(history) >= 100 else 0.0005

    def stagnation_level(self, window=10):
        if len(self.prev_learning_states) < window:
            return 0.0
        recent = list(self.prev_learning_states)[-window:]
        diffs = []
        for i in range(1, len(recent)):
            a, b = recent[i], recent[i-1]
            min_dim = min(len(a), len(b))
            diffs.append(np.linalg.norm(a[:min_dim] - b[:min_dim]))
        return 1.0 - np.tanh(np.mean(diffs) * 2.0)

    def predict_future_error(self, state, action, context_state, raw_position=None):
        entity_novelty = np.mean([e.predict(state) * e.utility for e in self.entities()]) if self.entities() else 0.5
        combined = entity_novelty * 0.7 + action.utility * 0.3
        
        current_map = int(context_state[2])
        loc = self.get_location_key(*(raw_position if raw_position else (context_state[0]*255, context_state[1]*255)), current_map)
        
        map_debt = min(self.map_novelty_debt.get(current_map, 0.0), self.MAX_MAP_DEBT)
        loc_debt = min(self.location_novelty.get(loc, 0.0), self.MAX_LOCATION_DEBT)
        total_debt = map_debt + self.get_temp_debt(current_map) + loc_debt * 0.5
        combined *= 1.0 / (1.0 + total_debt * 5.0)
        
        if action.action == self.current_repeated_action and self.consecutive_action_count > self.LEARNING_SLOWDOWN_START:
            combined *= 1.0 / (1.0 + (self.consecutive_action_count - self.LEARNING_SLOWDOWN_START) * 0.15)
        if self.detected_pattern and action.action in self.detected_pattern:
            combined *= 1.0 / (1.0 + self.pattern_repeat_count * 0.2)
        
        return combined + np.random.randn() * 0.05

    def compute_multi_modal_error(self, state, next_state):
        diffs = [abs(next_state[i] - state[i]) for i in range(min(8, len(state), len(next_state)))]
        weights = [0.5, 0.5, 10.0, 5.0, 3.0, 2.0, 1.5, 0.3]
        weighted = sum(d * w for d, w in zip(diffs, weights)) + np.linalg.norm(next_state[8:] - state[8:]) * 2.0
        numeric = sum(diffs)
        visual = np.linalg.norm(next_state[8:] - state[8:])
        return weighted, numeric, visual

    def learn(self, learning_state, next_learning_state, context_state, next_context_state, dead=False,
              raw_position=None, next_raw_position=None):
        if learning_state.shape != next_learning_state.shape:
            max_dim = max(len(learning_state), len(next_learning_state))
            learning_state = np.pad(learning_state, (0, max(0, max_dim - len(learning_state))))
            next_learning_state = np.pad(next_learning_state, (0, max(0, max_dim - len(next_learning_state))))
        
        if not self.innate_entities_spawned:
            self.spawn_innate_entities(learning_state)
        
        prev_context = self.prev_context_states[-1] if self.prev_context_states else None
        prev_raw = getattr(self, '_last_raw_position', None)
        self.update_exploration_tracking(context_state, prev_context, raw_position, prev_raw)
        self._last_raw_position = raw_position
        
        weighted_error, numeric_error, visual_error = self.compute_multi_modal_error(learning_state, next_learning_state)
        self.error_history.append(weighted_error)
        self.numeric_error_history.append(numeric_error)
        self.visual_error_history.append(visual_error)
        
        # === ENTITY SPAWNING: spawn freely when novelty exceeds threshold ===
        # Only spawn from spatial novelty ‚Äî not menu/battle context changes
        spawn_threshold = self.get_spawn_threshold_adaptive('combined', percentile=75)
        if weighted_error > spawn_threshold and len(self.error_history) >= 100:
            menu_active = context_state[4] > 0.5
            battle_active = context_state[3] > 0.5
            if not menu_active and not battle_active:
                self.spawn_entity_from_novelty(learning_state, context_state, raw_position)
        
        current_map = int(context_state[2])
        loc = self.get_location_key(*(raw_position if raw_position else (context_state[0]*255, context_state[1]*255)), current_map)
        
        self.visited_maps[current_map] = self.visited_maps.get(current_map, 0) + 1
        self.location_memory[loc] = self.location_memory.get(loc, 0) + 1
        
        if self.visited_maps[current_map] > 10:
            self.map_novelty_debt[current_map] = min(self.MAX_MAP_DEBT, 
                self.map_novelty_debt.get(current_map, 0.0) + 0.05 * (self.visited_maps[current_map] - 10))
        if self.location_memory[loc] > 15:
            self.location_novelty[loc] = min(self.MAX_LOCATION_DEBT,
                self.location_novelty.get(loc, 0.0) + 0.1 * (self.location_memory[loc] - 15))
        
        if self.visited_maps[current_map] > 30:
            weighted_error *= 0.5
        if self.location_memory[loc] > 25:
            weighted_error *= 0.7
        
        stagnation = self.stagnation_level()
        learning_mult = self.get_learning_multiplier(self.last_action) if self.last_action else 1.0
        if self.detected_pattern and self.last_action in self.detected_pattern:
            learning_mult *= 0.5
        
        for p in self.perceptrons:
            mult = learning_mult if (p.kind == "action" and p.action == self.last_action) else 1.0
            if p.kind == "action" and self.detected_pattern and p.action in self.detected_pattern:
                mult *= 0.5
            p.update(learning_state, weighted_error * mult, stagnation=stagnation)
        
        for a in self.actions():
            if a.action in ['Start', 'Select'] and a.weights is not None:
                a.weights *= 0.999
        
        self.apply_repetition_penalty()
        self.apply_pattern_penalty()
        self.enforce_utility_floors()
        
        # Movement boost - ONLY if not stuck in repetition and not in navigation
        # ONLY in overworld ‚Äî menu/battle state changes don't count as movement progress
        if prev_context is not None and np.linalg.norm(context_state[:2] - prev_context[:2]) > 0.001:
            if self.last_action and self.consecutive_action_count < self.PENALTY_THRESHOLD:
                menu_active = context_state[4] > 0.5
                battle_active = context_state[3] > 0.5
                if not menu_active and not battle_active:
                    if not self.nav_active:
                        for a in self.actions():
                            if a.action == self.last_action:
                                boost = 1.15 if raw_position and self.is_near_map_edge(*raw_position) else 1.08
                                a.utility = min(a.utility * boost, 2.0)
                                break
                    else:
                        for a in self.actions():
                            if a.action == self.last_action:
                                boost = 1.0 + (0.08 * self.NAV_LEARNING_DAMPENING)
                                a.utility = min(a.utility * boost, 2.0)
                                break
        
        if self.timestep % self.SAVE_INTERVAL == 0:
            self.save_exploration_memory()
        
        self.action_history.append(self.last_action)

    def log_state(self, learning_state, context_state):
        self.prev_learning_states.append(learning_state)
        self.prev_context_states.append(context_state)

    def update_position(self, x, y):
        self.last_positions.append((int(x), int(y)))
    
    def get_tile_interaction_stats(self, map_id):
        memory = self.get_current_map_memory(map_id)
        tile_interactions = memory.get('tile_interactions', {})
        return {
            'probed': len(tile_interactions),
            'exhausted': sum(1 for t in tile_interactions.values() if t.get('exhausted', False)),
            'with_success': sum(1 for t in tile_interactions.values() if any(t.get('direction_successes', {}).get(d, 0) > 0 for d in range(4)))
        }

    def load_taught_model(self, filepath):
        try:
            with open(filepath, 'r') as f:
                model = json.load(f)
            
            if "perceptrons" not in model:
                print(f"  ‚ö†Ô∏è Model file empty or invalid, starting fresh")
                return 0
            
            for saved_action in model["perceptrons"]["actions"]:
                for a in self.actions():
                    if a.action == saved_action["action"]:
                        a.utility = saved_action["utility"]
                        a.learning_rate = saved_action.get("learning_rate", 0.01)
                        a.familiarity = saved_action.get("familiarity", 0.0)
                        if saved_action.get("weights_nonzero"):
                            dim = saved_action.get("weights_shape", 1376)
                            a.weights = np.zeros(dim)
                            for idx, val in saved_action["weights_nonzero"]:
                                if idx < dim:
                                    a.weights[idx] = val
                        break
                    if a.action in ['Start', 'Select'] and a.weights is not None:
                        a.weights = np.zeros(len(a.weights))
                        a.utility = 0.05
            
            for saved_entity in model["perceptrons"].get("entities", []):
                for e in self.entities():
                    if e.entity_type == saved_entity["entity_type"]:
                        e.utility = saved_entity.get("utility", 1.0)
                        e.familiarity = saved_entity.get("familiarity", 0.0)
                        if saved_entity.get("weights_nonzero"):
                            dim = saved_entity.get("weights_shape", 1376)
                            e.weights = np.zeros(dim)
                            for idx, val in saved_entity["weights_nonzero"]:
                                if idx < dim:
                                    e.weights[idx] = val
                        break
            
            if "debt_tracking" in model:
                debt = model["debt_tracking"]
                self.map_novelty_debt = {int(k): v for k, v in debt.get("map_novelty_debt", {}).items()}
                self.visited_maps = {int(k): v for k, v in debt.get("visited_maps", {}).items()}
                for k, v in debt.get("location_novelty", {}).items():
                    self.location_novelty[eval(k)] = v
            
            loaded_timestep = model.get("timestep", 0)
            self.timestep = loaded_timestep
            return loaded_timestep
        
        except Exception as e:
            print(f"  ‚ö†Ô∏è Error loading model: {e}, starting fresh")
            return 0

    def merge_taught_exploration(self, taught_filepath):
        if not Path(taught_filepath).exists():
            print(f"  No taught exploration memory found at {taught_filepath}")
            return
        
        with open(taught_filepath, 'r') as f:
            taught_data = json.load(f)
        
        transitions_added = 0
        interactables_added = 0
        
        for map_key, taught_map in taught_data.items():
            map_id = int(map_key.replace('map_', ''))
            ai_map = self.get_current_map_memory(map_id)
            
            for t_trans in taught_map.get('transitions', []):
                t_pos = tuple(t_trans['position'])
                t_dir = t_trans['direction']
                exists = any(
                    tuple(existing['position']) == t_pos and existing['direction'] == t_dir
                    for existing in ai_map['transitions']
                )
                if not exists:
                    ai_map['transitions'].append(t_trans)
                    transitions_added += 1
            
            for t_inter in taught_map.get('interactable_objects', []):
                if t_inter not in ai_map['interactable_objects']:
                    ai_map['interactable_objects'].append(t_inter)
                    interactables_added += 1
        
        print(f"  Merged: {transitions_added} transitions, {interactables_added} interactables")
    
    def save_model_checkpoint(self, filepath):
        model = {
            "timestep": self.timestep,
            "perceptrons": {"actions": [], "entities": []},
            "debt_tracking": {
                "map_novelty_debt": {str(k): v for k, v in self.map_novelty_debt.items()},
                "location_novelty": {str(k): v for k, v in self.location_novelty.items()},
                "visited_maps": {str(k): v for k, v in self.visited_maps.items()}
            },
            "control_mode": self.control_mode,
            "markov_stats": {
                "markov_action_count": self.markov_action_count,
                "curiosity_action_count": self.curiosity_action_count
            },
            "blend_stats": {
                "blend_count": self.blend_count,
                "last_blend_tier": self.blend_tier
            },
            "battle_stats": {
                "battle_action_count": self.battle_action_count,
                "battle_markov_action_count": self.battle_markov_action_count,
                "current_battle_id": self.current_battle_id
            }
        }
        
        for a in self.actions():
            action_data = {
                "action": a.action,
                "group": a.group,
                "utility": float(a.utility),
                "weights_shape": len(a.weights) if a.weights is not None else 0,
                "weights_nonzero": [[i, float(v)] for i, v in enumerate(a.weights) if abs(v) > 1e-10] if a.weights is not None else [],
                "learning_rate": float(a.learning_rate),
                "familiarity": float(a.familiarity)
            }
            model["perceptrons"]["actions"].append(action_data)
        
        for e in self.entities():
            entity_data = {
                "entity_type": e.entity_type,
                "utility": float(e.utility),
                "weights_shape": len(e.weights) if e.weights is not None else 0,
                "weights_nonzero": [[i, float(v)] for i, v in enumerate(e.weights) if abs(v) > 1e-10] if e.weights is not None else [],
                "familiarity": float(e.familiarity)
            }
            model["perceptrons"]["entities"].append(entity_data)
        
        with open(filepath, 'w') as f:
            json.dump(model, f, indent=2)

In [None]:
# ============================================================================
# CELL 4: Action Selection - Battle + Cross-Map Nav + Markov + Curiosity
# ============================================================================
# CHANGES:
# 1. battle_action() ‚Äî separate battle handler (Markov + A fallback)
# 2. Battle intercept at top of anticipatory_action()
# 3. Navigation section handles paused state ‚Äî lets curiosity run when
#    nav is paused waiting for transition discovery
# ============================================================================

import random

GBA_ACTIONS = ["Up", "Down", "Left", "Right", "A", "B", "Start", "Select"]
ACTION_DELTAS = {"UP": (0, -1), "DOWN": (0, 1), "LEFT": (-1, 0), "RIGHT": (1, 0)}
DIRECTION_TO_ACTION = {0: "DOWN", 1: "UP", 2: "LEFT", 3: "RIGHT"}
ACTION_TO_DIRECTION = {"DOWN": 0, "UP": 1, "LEFT": 2, "RIGHT": 3}

def manhattan_distance(pos1, pos2):
    return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])


# ============================================================================
# BATTLE ACTION ‚Äî Completely isolated from overworld logic
# ============================================================================

def battle_action(brain, context_state, palette_state=None):
    """
    Battle-only action selection. Called when in_battle > 0.5.
    
    COMPLETELY ISOLATED from overworld:
    - No navigation, curiosity, stagnation, forced random, mode switching
    
    Strategy:
    1. Markov match against taught battle frames
    2. If match ‚Üí return that action
    3. If no match ‚Üí press A (advances text, selects default menu options)
    4. No randomness ‚Äî deterministic fallback
    """
    actions_list = brain.actions()
    
    brain.battle_frame_count += 1
    brain.battle_action_count += 1
    
    matched, action_name, score = brain.get_battle_markov_action(
        context_state, palette_state
    )
    
    if matched and action_name:
        if action_name in ["START", "SELECT"]:
            action_name = "A"
        
        brain.battle_markov_action_count += 1
        brain.last_battle_markov_action = action_name
        
        for a in actions_list:
            if a.action == action_name:
                brain.record_action_execution(a.action)
                brain.track_consecutive_action(a.action)
                brain.battle_action_history.append(a.action)
                return a
    
    for a in actions_list:
        if a.action == "A":
            brain.record_action_execution(a.action)
            brain.track_consecutive_action(a.action)
            brain.battle_action_history.append(a.action)
            return a
    
    return actions_list[0]


# ============================================================================
# CURIOSITY OVERRIDE CHECK (overworld only)
# ============================================================================

def _check_curiosity_override(brain, learning_state, context_state, raw_position, map_density):
    """
    Check if curiosity detects something novel enough to override navigation.
    Returns True if curiosity should take over.
    """
    raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
    raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
    current_map = int(context_state[2])
    
    memory = brain.get_current_map_memory(current_map)
    
    if (raw_x, raw_y) not in memory['visited_tiles']:
        return True
    
    if brain.should_interact_at_tile(raw_x, raw_y, current_map):
        untried = brain.get_untried_directions(raw_x, raw_y, current_map)
        if untried:
            return True
    
    if context_state[3] <= 0.5 and context_state[4] <= 0.5 and brain.entities():
        entity_signal = np.mean([abs(e.predict(learning_state)) * e.utility for e in brain.entities()])
        density = map_density or {'tier': 'medium'}
        novelty_threshold = {
            'sparse': 0.15, 'thin': 0.25, 'medium': 0.35, 'dense': 0.45
        }.get(density['tier'], 0.35)
        
        if entity_signal > novelty_threshold:
            return True
    
    return False


# ============================================================================
# MAIN ACTION SELECTION ‚Äî Battle intercept at top
# ============================================================================

def anticipatory_action(brain, learning_state, context_state, 
                       exploration_weight=1.3, min_interact_prob=0.15,
                       raw_position=None,
                       forced_explore_prob=0.18,
                       override_threshold=1.5,
                       taught_frames=None,
                       map_density=None,
                       palette_state=None):
    """
    ACTION SELECTION HIERARCHY:
    0. BATTLE THREAD (if in_battle ‚Äî Markov only, no overworld logic)
    1. Forced random + blend (when badly stuck)
    2. Navigation mode (single-map A* or cross-map chain)
       - Paused nav: curiosity/exploration runs while waiting for transitions
       - Curiosity can override if something novel detected
    3. Markov imitation (familiar taught situation)
    4. Curiosity exploration (default)
    """
    actions_list = brain.actions()
    if not actions_list:
        return Perceptron("action", action="UP", group="move")

    # =================================================================
    # === 0. BATTLE INTERCEPT ‚Äî before ALL overworld logic ===
    # =================================================================
    in_battle = context_state[3] > 0.5
    
    if in_battle:
        return battle_action(brain, context_state, palette_state)

    # =================================================================
    # === OVERWORLD LOGIC BELOW ‚Äî only runs when NOT in battle ===
    # =================================================================

    # === ADAPT THRESHOLDS TO DATA DENSITY ===
    density = map_density or {'taught_frames': 0, 'tier': 'sparse', 'coverage': 0.0, 'visited': 0}
    tier = density['tier']
    
    markov_threshold = {
        'sparse': 0.72, 'thin': 0.65, 'medium': 0.58, 'dense': 0.50
    }.get(tier, MARKOV_FAMILIARITY_THRESHOLD)
    
    adapted_explore_prob = {
        'sparse': 0.30, 'thin': 0.24, 'medium': 0.18, 'dense': 0.12
    }.get(tier, forced_explore_prob)
    
    adapted_exploration_weight = {
        'sparse': 1.8, 'thin': 1.5, 'medium': 1.3, 'dense': 1.1
    }.get(tier, exploration_weight)
    
    transition_weight_mult = {
        'sparse': 0.3, 'thin': 0.6, 'medium': 1.0, 'dense': 1.4
    }.get(tier, 1.0)

    raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
    raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
    current_map = int(context_state[2])
    current_dir = int(context_state[5])
    current_pos = (raw_x, raw_y)

    # === 1. FORCED RANDOMIZATION + BLEND (highest priority when stuck) ===
    brain.check_state_stagnation(context_state)
    
    if brain.should_force_random():
        if brain.is_nav_active():
            brain.abort_navigation("forced random triggered")
        
        forced_name = brain.get_forced_random_action_name()
        for a in actions_list:
            if a.action == forced_name:
                brain.curiosity_action_count += 1
                brain.record_action_execution(a.action)
                brain.track_consecutive_action(a.action)
                print(f"  üé≤ FORCED RANDOM: {forced_name} (pos_stag={brain.get_position_stagnation()}, "
                      f"repeat={brain.consecutive_action_count}, pattern={brain.pattern_repeat_count})")
                return a

    # === 2. NAVIGATION MODE ===
    
    if context_state[3] <= 0.5 and context_state[4] <= 0.5:
        brain.update_known_area_counter(raw_x, raw_y, current_map)
    
    if not brain.is_nav_active() and brain.should_start_navigation():
        if context_state[3] <= 0.5 and context_state[4] <= 0.5:
            started = brain.start_navigation(current_pos, current_map)
            if started:
                brain.known_area_counter = 0
    
    if brain.is_nav_active():
        # === PAUSED NAV: waiting for transition discovery ===
        # Let curiosity/exploration run ‚Äî don't follow a path, don't abort
        # The pause check happens inside update_nav_state
        if brain.is_nav_paused():
            # Still call update_nav_state so it checks for resume
            brain.update_nav_state(current_pos, current_map)
            # Fall through to Markov/Curiosity below ‚Äî explore freely
        else:
            # === ACTIVE NAV: following a path ===
            if _check_curiosity_override(brain, learning_state, context_state, raw_position, map_density):
                # Don't abort cross-map nav for curiosity ‚Äî just pause momentarily
                if brain.nav_map_chain:
                    pass  # Let nav continue ‚Äî cross-map takes priority over curiosity
                else:
                    brain.abort_navigation("curiosity override")
                    print(f"  üß≠‚Üíüîç NAV interrupted: curiosity detected novelty at ({raw_x}, {raw_y})")
            else:
                nav_continue = brain.update_nav_state(current_pos, current_map)
                
                if nav_continue:
                    nav_action_name = brain.get_nav_action(current_pos)
                    
                    if nav_action_name:
                        for a in actions_list:
                            if a.action == nav_action_name:
                                brain.curiosity_action_count += 1
                                brain.record_action_execution(a.action)
                                brain.track_consecutive_action(a.action)
                                return a
                    else:
                        if not brain.nav_paused:
                            brain.abort_navigation("path invalid")
    
    # Handle nav curiosity window (arrived at target, letting curiosity check)
    if brain.is_in_nav_curiosity_window():
        window_expired = brain.tick_nav_curiosity_window()
        
        if window_expired:
            memory = brain.get_current_map_memory(current_map)
            tile_needs_probing = brain.should_interact_at_tile(raw_x, raw_y, current_map)
            found_novelty = tile_needs_probing or (current_pos not in memory['visited_tiles'])
            
            brain.complete_nav_target(found_novelty)

    # === 3. MARKOV CHECK (with adapted threshold) ===
    use_markov = False
    markov_action = None
    markov_confidence = 0.0
    
    if brain.markov_enabled and taught_frames:
        score, action, idx = brain.compute_markov_similarity(
            context_state, raw_position, taught_frames=taught_frames
        )
        brain.last_markov_score = score
        
        if score >= markov_threshold and action:
            use_markov = True
            markov_action = action
            markov_confidence = score
            brain.last_markov_action = action
    
    if use_markov and markov_action:
        for a in actions_list:
            if a.action == markov_action:
                brain.markov_action_count += 1
                brain.record_action_execution(a.action)
                brain.track_consecutive_action(a.action)
                
                if a.action == 'A':
                    if brain.should_interact_at_tile(raw_x, raw_y, current_map):
                        brain.start_interaction_verification(
                            raw_x, raw_y, current_map, int(context_state[5])
                        )
                
                return a

    # === 4. CURIOSITY-DRIVEN SELECTION (default) ===
    brain.curiosity_action_count += 1
    
    mode = brain.determine_control_mode(context_state, raw_position=raw_position)
    
    memory = brain.get_current_map_memory(current_map)
    visited_tiles = memory['visited_tiles']
    obstructions = memory['obstructions']
    
    tile_needs_probing = brain.should_interact_at_tile(raw_x, raw_y, current_map)
    probe_action, probe_dir = brain.get_best_probe_action(raw_x, raw_y, current_map, current_dir)
    
    transition_attraction, best_transition = brain.get_transition_attraction(current_map)
    coverage = brain.get_exploration_coverage(current_map)

    # Forced random exploration (adapted to density)
    if random.random() < adapted_explore_prob:
        valid = [a for a in actions_list if a.action not in ['Start', 'Select']]
        chosen = random.choice(valid)
        brain.record_action_execution(chosen.action)
        brain.track_consecutive_action(chosen.action)
        if chosen.action == 'A' and tile_needs_probing:
            brain.start_interaction_verification(raw_x, raw_y, current_map, current_dir)
        return chosen

    # Score ALL actions
    action_scores = {}
    
    for a in actions_list:
        if a.action in ['Start', 'Select']:
            action_scores[a.action] = (a, 0.0)
            continue
            
        predicted = brain.predict_future_error(learning_state, a, context_state, raw_position=raw_position)
        
        if a.group == "move":
            predicted *= adapted_exploration_weight
            
            dx, dy = ACTION_DELTAS.get(a.action, (0, 0))
            target_tile = (raw_x + dx, raw_y + dy)
            action_direction = ACTION_TO_DIRECTION.get(a.action, -1)
            
            if target_tile not in visited_tiles:
                predicted *= brain.UNVISITED_TILE_BONUS
            
            if target_tile in obstructions:
                predicted *= brain.OBSTRUCTION_PENALTY
            
            if brain.is_position_banned(current_map, raw_x, raw_y, action_direction):
                predicted *= 0.05
            
            if transition_attraction > 0.3 and best_transition and coverage > 0.5:
                trans_pos = tuple(best_transition['position']) if isinstance(best_transition['position'], list) else best_transition['position']
                if manhattan_distance(target_tile, trans_pos) < manhattan_distance(current_pos, trans_pos):
                    predicted *= (1.0 + transition_attraction * transition_weight_mult)
            
            if probe_action == a.action and probe_dir is not None:
                predicted *= 2.0
            
            predicted *= (0.9 + random.random() * 0.2)
        
        elif a.group == "interact":
            predicted = max(predicted, min_interact_prob)
            
            if a.action == 'B':
                predicted *= brain.menu_trap_b_boost
            
            if a.action == 'A':
                if tile_needs_probing and probe_action == 'A':
                    predicted *= 3.0
                elif tile_needs_probing:
                    predicted *= 0.5
                else:
                    predicted *= 0.3
        
        action_scores[a.action] = (a, predicted)

    # Find best in-mode and best out-of-mode
    if mode == "battle":
        preferred_group = "interact"
    elif mode == "interact":
        preferred_group = "interact"
    else:
        preferred_group = "move"
    
    in_mode = [(a, s) for name, (a, s) in action_scores.items() if a.group == preferred_group and s > 0]
    out_mode = [(a, s) for name, (a, s) in action_scores.items() if a.group != preferred_group and s > 0 and a.action not in ['Start', 'Select']]
    
    best_in_mode = max(in_mode, key=lambda x: x[1]) if in_mode else None
    best_out_mode = max(out_mode, key=lambda x: x[1]) if out_mode else None
    
    chosen = None
    
    if best_in_mode and best_out_mode:
        if best_out_mode[1] > best_in_mode[1] * override_threshold:
            chosen = best_out_mode[0]
        else:
            chosen = best_in_mode[0]
    elif best_in_mode:
        chosen = best_in_mode[0]
    elif best_out_mode:
        chosen = best_out_mode[0]
    else:
        chosen = max(actions_list, key=lambda a: a.utility)
    
    brain.record_action_execution(chosen.action)
    brain.track_consecutive_action(chosen.action)
    
    if chosen.action == 'A' and tile_needs_probing:
        brain.start_interaction_verification(raw_x, raw_y, current_map, current_dir)
    
    return chosen

In [None]:
# ============================================================================
# CELL 5: Cache System - MapCache, CacheManager, IOThread
# ============================================================================
# NEW CELL - Layer on top of existing code, no rewrites
#
# MapCache: Per-map data container (exploration + taught transitions + live state)
# CacheManager: Indexes all maps at startup, handles switching
# IOThread: Background file I/O decoupled from Brain
# ============================================================================

import threading
import gc

class MapCache:
    """Thread-safe container for one map's data."""

    def __init__(self, map_id):
        self.map_id = map_id
        self.lock = threading.Lock()

        # From exploration_memory[map_id] - synced to/from Brain
        self.exploration_data = None  # Set by CacheManager

        # From taught_transitions filtered by map_id
        self.taught_frames = []

        # Live state (IOThread writes, Brain reads)
        self.current_state = np.zeros(EXPECTED_STATE_DIM)
        self.palette = np.zeros(PALETTE_DIM)
        self.tiles = np.zeros(TILE_DIM)
        self.raw_position = (0, 0)
        self.dead = False
        self.state_fresh = False  # True when IOThread wrote new state
        self.state_version = 0    # Increments each IOThread write

        # Pending action (Brain writes, IOThread reads)
        self.pending_action_out = None  # Action to write to file

    def get_state(self):
        with self.lock:
            return (
                self.current_state.copy(),
                self.palette.copy(),
                self.tiles.copy(),
                self.dead,
                self.raw_position
            )

    def update_state(self, context_state, palette, tiles, dead, raw_position):
        with self.lock:
            self.current_state = context_state
            self.palette = palette
            self.tiles = tiles
            self.dead = dead
            self.raw_position = raw_position
            self.state_fresh = True
            self.state_version += 1

    def is_fresh(self):
        with self.lock:
            return self.state_fresh

    def mark_consumed(self):
        with self.lock:
            self.state_fresh = False

    def get_version(self):
        with self.lock:
            return self.state_version

    def set_pending_action(self, action_name):
        with self.lock:
            self.pending_action_out = action_name

    def get_pending_action(self):
        with self.lock:
            a = self.pending_action_out
            self.pending_action_out = None
            return a

    def get_taught_frames(self):
        """Return taught transitions for this map (no lock needed, read-only after init)."""
        return self.taught_frames


class CacheManager:
    """Manages all MapCaches. Pre-indexes at startup, handles map switching."""

    def __init__(self, brain):
        self.brain = brain
        self.caches = {}        # map_id -> MapCache
        self.active_cache = None
        self.active_map_id = None
        self.lock = threading.Lock()

    def load_all(self, exploration_path=None, taught_path=None):
        """
        Startup: Load exploration memory + taught transitions,
        create MapCache for each map, index taught frames by map.
        """
        exploration_path = exploration_path or EXPLORATION_MEMORY_FILE
        taught_path = taught_path or TAUGHT_TRANSITIONS_FILE

        # 1. Exploration memory is already loaded in Brain
        for map_id, mem_data in self.brain.exploration_memory.items():
            cache = self._get_or_create(map_id)
            cache.exploration_data = mem_data

        # 2. Index taught transitions by map_id
        taught_by_map = {}
        for t in self.brain.taught_transitions:
            t_map = t.get('state', {}).get('map_id')
            if t_map is not None:
                taught_by_map.setdefault(t_map, []).append(t)

        for map_id, frames in taught_by_map.items():
            cache = self._get_or_create(map_id)
            cache.taught_frames = frames

        total_maps = len(self.caches)
        total_taught = sum(len(c.taught_frames) for c in self.caches.values())
        print(f"  üì¶ CacheManager: {total_maps} maps cached, {total_taught} taught frames indexed")

    def _get_or_create(self, map_id):
        if map_id not in self.caches:
            self.caches[map_id] = MapCache(map_id)
        return self.caches[map_id]

    def get_active(self):
        return self.active_cache

    def detect_and_set_initial_map(self):
        """Read game_state.json once to determine starting map."""
        ctx, pal, til, dead, raw_pos = read_game_state()
        map_id = int(ctx[2])
        self._switch_to(map_id)
        # Seed the active cache with the initial state
        self.active_cache.update_state(ctx, pal, til, dead, raw_pos)
        print(f"  üì¶ Initial map: {map_id}")
        return map_id

    def switch_map(self, new_map_id):
        """Called by main thread when map changes."""
        if new_map_id == self.active_map_id:
            return
        self._sync_from_brain()  # Save Brain's exploration data back to current cache
        self._switch_to(new_map_id)
        self._sync_to_brain()    # Load new cache's data into Brain

    def _switch_to(self, map_id):
        with self.lock:
            cache = self._get_or_create(map_id)
            self.active_cache = cache
            self.active_map_id = map_id

    def _sync_to_brain(self):
        """Push active cache's exploration data into Brain."""
        cache = self.active_cache
        if cache and cache.exploration_data is not None:
            self.brain.exploration_memory[cache.map_id] = cache.exploration_data

    def _sync_from_brain(self):
        """Pull Brain's exploration data into active cache."""
        cache = self.active_cache
        if cache and cache.map_id in self.brain.exploration_memory:
            cache.exploration_data = self.brain.exploration_memory[cache.map_id]

    def sync_all_from_brain(self):
        """Sync ALL maps from Brain back to caches (for saving)."""
        for map_id, mem_data in self.brain.exploration_memory.items():
            cache = self._get_or_create(map_id)
            cache.exploration_data = mem_data

    def save_exploration_memory(self):
        """Save all maps' exploration data to disk."""
        self._sync_from_brain()  # Make sure current map is synced
        self.brain.save_exploration_memory()

    def get_active_taught_frames(self):
        """Return taught frames for current map only (for fast Markov scan)."""
        if self.active_cache:
            return self.active_cache.get_taught_frames()
        return []

    def get_map_density(self):
        """
        Returns a density dict for the active map, used to adapt thresholds.
        
        Density tiers:
          sparse:  < 50 taught frames
          thin:    50-200
          medium:  200-1000
          dense:   1000+
          
        Also includes exploration coverage and visited tile count.
        """
        if not self.active_cache:
            return {'taught_frames': 0, 'tier': 'sparse', 'coverage': 0.0, 'visited': 0}
        
        n_frames = len(self.active_cache.get_taught_frames())
        map_id = self.active_map_id
        
        # Exploration data from brain
        coverage = self.brain.get_exploration_coverage(map_id) if map_id is not None else 0.0
        memory = self.brain.get_current_map_memory(map_id) if map_id is not None else {}
        visited = len(memory.get('visited_tiles', set()))
        
        if n_frames < 50:
            tier = 'sparse'
        elif n_frames < 200:
            tier = 'thin'
        elif n_frames < 1000:
            tier = 'medium'
        else:
            tier = 'dense'
        
        return {
            'taught_frames': n_frames,
            'tier': tier,
            'coverage': coverage,
            'visited': visited
        }


class IOThread(threading.Thread):
    """Background thread: reads game_state.json, writes action.json."""

    def __init__(self, cache_manager, interval=0.02, gc_interval=300):
        super().__init__(daemon=True)
        self.cm = cache_manager
        self.interval = interval
        self.gc_interval = gc_interval  # GC every N iterations
        self.running = False
        self._iteration = 0

    def run(self):
        self.running = True
        print(f"  üîÑ IOThread started (interval={self.interval*1000:.0f}ms)")

        while self.running:
            try:
                cache = self.cm.get_active()
                if cache is None:
                    time.sleep(self.interval)
                    continue

                # --- READ game_state.json ---
                ctx, pal, til, dead, raw_pos = read_game_state()
                cache.update_state(ctx, pal, til, dead, raw_pos)

                # --- WRITE action.json ---
                action = cache.get_pending_action()
                if action is not None:
                    write_action(action)

                # --- PERIODIC GC ---
                self._iteration += 1
                if self._iteration % self.gc_interval == 0:
                    gc.collect()

            except Exception as e:
                print(f"  [IOThread ERROR] {e}")

            time.sleep(self.interval)

    def stop(self):
        self.running = False
        print("  üîÑ IOThread stopped")

In [None]:
# ============================================================================
# CELL 6: Main Loop - Battle + Cross-Map Nav + Cache + Blend
# ============================================================================
# CHANGES:
# 1. Load battle transitions at startup
# 2. Battle start/end detection and logging
# 3. Pass palette_state to anticipatory_action for battle Markov
# 4. Cross-map nav: advance_map_chain() on map change
# 5. Cross-map nav status in logging
# 6. Map graph info in startup banner and milestones
# ============================================================================

brain = Brain()

for b in ["UP", "DOWN", "LEFT", "RIGHT"]:
    brain.add(Perceptron("action", action=b, group="move"))
for b in ["A", "B", "Start", "Select"]:
    brain.add(Perceptron("action", action=b, group="interact"))

# === FILE PATHS ===
TAUGHT_MODEL_PATH = BASE_PATH / "taught_model_checkpoint.json"
TAUGHT_EXPLORATION_PATH = BASE_PATH / "taught_exploration_memory.json"

# === LOAD AI'S OWN MODEL (primary) ===
if MODEL_CHECKPOINT_FILE.exists():
    loaded_ts = brain.load_taught_model(MODEL_CHECKPOINT_FILE)
    print(f"ü§ñ AI MODEL: Loaded from timestep {loaded_ts}")
    print(f"   Utilities: {[f'{a.action}:{a.utility:.3f}' for a in brain.actions()]}")
else:
    print("ü§ñ AI MODEL: No existing model ‚Äî starting fresh")

# === LOAD TAUGHT REFERENCE (read-only, for stagnation blending) ===
brain.load_taught_reference(TAUGHT_MODEL_PATH)

# === MERGE TAUGHT EXPLORATION (additive) ===
brain.merge_taught_exploration(TAUGHT_EXPLORATION_PATH)

# === LOAD TAUGHT TRANSITIONS FOR OVERWORLD MARKOV ===
brain.load_taught_transitions(TAUGHT_TRANSITIONS_FILE)

# === LOAD TAUGHT BATTLE TRANSITIONS FOR BATTLE MARKOV ===
brain.load_taught_battle_transitions(TAUGHT_BATTLE_TRANSITIONS_FILE)

# === LOAD TAUGHT NAVIGATION TARGETS ===
brain.load_taught_nav_targets(TAUGHT_NAV_TARGETS_FILE)

# === BUILD INITIAL MAP GRAPH ===
map_graph = brain.build_map_graph()
graph_edges = sum(len(v) for v in map_graph.values())
graph_maps = list(map_graph.keys())

# === INITIALIZE CACHE SYSTEM ===
cache_manager = CacheManager(brain)
cache_manager.load_all()
cache_manager.detect_and_set_initial_map()

# === START I/O THREAD ===
io_thread = IOThread(cache_manager, interval=0.02, gc_interval=300)
io_thread.start()

exploration_weight = 1.3
forced_explore_prob = 0.18
prev_context_state = None
prev_raw_position = None
last_processed_version = -1

print("="*70)
print("AI CONTROL - v12.0 (Battle + Cross-Map Nav + Markov + Cache + Blend)")
print("="*70)
print("MODELS:")
print(f"  - AI model: {MODEL_CHECKPOINT_FILE}")
print(f"  - Taught reference: {TAUGHT_MODEL_PATH} ({'loaded' if brain.taught_reference['loaded'] else 'NOT FOUND'})")
if brain.taught_reference['loaded']:
    taught_utils = ', '.join(f"{k}:{v:.3f}" for k, v in brain.taught_reference['utilities'].items())
    print(f"  - Taught utilities: {taught_utils}")
print("="*70)
print("BATTLE SYSTEM:")
print(f"  - Battle transitions: {'LOADED' if brain.battle_loaded else 'NOT FOUND (A-button fallback)'}")
if brain.battle_loaded:
    print(f"  - Battle frames: {len(brain.battle_transitions)}")
    print(f"  - Battles recorded: {brain.battle_metadata.get('battles_recorded', 0)}")
    print(f"  - Avg battle length: {brain.battle_metadata.get('avg_battle_length', 0)}")
    outcomes = brain.battle_metadata.get('outcomes', {})
    if outcomes:
        print(f"  - Outcomes: win={outcomes.get('win',0)} run={outcomes.get('run',0)} loss={outcomes.get('loss',0)}")
    print(f"  - Markov threshold: {BATTLE_MARKOV_THRESHOLD_LOW:.2f}-{BATTLE_MARKOV_THRESHOLD_HIGH:.2f}")
print(f"  - Fallback action: A (advances text, selects defaults)")
print(f"  - Isolated from: navigation, curiosity, stagnation, forced random")
print("="*70)
print("CACHE SYSTEM:")
print(f"  - Maps cached: {len(cache_manager.caches)}")
print(f"  - Active map: {cache_manager.active_map_id}")
active_taught = len(cache_manager.get_active_taught_frames())
print(f"  - Active map taught frames: {active_taught}")
print(f"  - Total taught frames: {len(brain.taught_transitions)}")
print(f"  - IOThread interval: {io_thread.interval*1000:.0f}ms")
print("="*70)
print("BLEND SYSTEM:")
print(f"  - Tier 1 (light 80/20):  pattern 3+ | pos stuck 8+ | repeat 12+")
print(f"  - Tier 2 (medium 60/40): pattern 6+ | pos stuck 15+ | repeat 15+")
print(f"  - Tier 3 (hard 40/60):   pattern 10+ | state stag 2x threshold")
print(f"  - Cooldown: {brain.BLEND_COOLDOWN} steps between blends")
print("="*70)
print("NAVIGATION SYSTEM:")
print(f"  - Known area trigger: {brain.KNOWN_AREA_TRIGGER} steps in visited tiles")
print(f"  - Nav stagnation limit: {brain.NAV_STAGNATION_LIMIT} steps")
print(f"  - Nav max steps: {brain.NAV_MAX_STEPS}")
print(f"  - Curiosity window at target: {brain.NAV_CURIOSITY_WINDOW} steps")
print(f"  - Learning dampening during nav: {brain.NAV_LEARNING_DAMPENING}")
nav_status = brain.get_nav_targets_status()
if nav_status['loaded']:
    print(f"  - Taught targets: {nav_status['total']} across {list(brain.taught_nav_targets.keys())}")
else:
    print(f"  - Taught targets: NOT LOADED (using frontier fallback)")
print(f"  CROSS-MAP:")
print(f"  - Map graph: {len(graph_maps)} maps, {graph_edges} edges")
if graph_maps:
    print(f"  - Connected maps: {graph_maps}")
print(f"  - Pause check interval: {brain.NAV_PAUSE_CHECK_INTERVAL} steps")
print("="*70)
print("MARKOV SYSTEM (OVERWORLD):")
print(f"  - Taught batches: {len(brain.taught_batches)}")
print(f"  - Taught frames: {len(brain.taught_transitions)}")
print(f"  - Density-adaptive thresholds: sparse=0.72 thin=0.65 medium=0.58 dense=0.50")
print("="*70)
print("CURIOSITY SYSTEM:")
print(f"  - Forced random exploration: {forced_explore_prob:.0%}")
print(f"  - Unvisited tile bonus: {brain.UNVISITED_TILE_BONUS}x")
print(f"  - Obstruction penalty: {brain.OBSTRUCTION_PENALTY}x")
print("="*70)

try:
    while True:
        # === WAIT FOR NEW STATE FROM IOTHREAD ===
        active_cache = cache_manager.get_active()
        current_version = active_cache.get_version()
        
        if current_version == last_processed_version:
            time.sleep(0.005)
            continue
        
        context_state, palette_state, tile_state, dead, raw_position = active_cache.get_state()
        last_processed_version = current_version
        
        # Skip if zero state (IOThread hasn't started yet)
        if np.sum(np.abs(context_state)) < 0.001:
            time.sleep(0.01)
            continue
        
        raw_x, raw_y = raw_position
        in_battle = context_state[3]
        current_map = int(context_state[2])
        current_dir = int(context_state[5])
        
        # === BATTLE START/END DETECTION ===
        currently_in_battle = in_battle > 0.5
        
        if currently_in_battle and not brain.in_battle_last_frame:
            brain.current_battle_id += 1
            brain.battle_frame_count = 0
            brain.battle_action_history.clear()
            print(f"\n  ‚öîÔ∏è BATTLE START (#{brain.current_battle_id}) at Map {current_map} ({raw_x}, {raw_y})")
            
            if brain.is_nav_active():
                brain.abort_navigation("battle started")
        
        elif not currently_in_battle and brain.in_battle_last_frame:
            battle_markov_rate = (brain.battle_markov_action_count / max(1, brain.battle_action_count))
            print(f"\n  ‚öîÔ∏è BATTLE END (#{brain.current_battle_id}) ‚Äî {brain.battle_frame_count} frames")
            print(f"     Battle Markov rate: {brain.battle_markov_action_count}/{brain.battle_action_count} "
                  f"({battle_markov_rate:.1%})")
            brain.battle_frame_count = 0
        
        brain.in_battle_last_frame = currently_in_battle
        
        # === MAP CHANGE DETECTION ===
        if not currently_in_battle and current_map != cache_manager.active_map_id:
            old_map = cache_manager.active_map_id
            cache_manager.switch_map(current_map)
            active_cache = cache_manager.get_active()
            print(f"  üì¶ Cache switched to map {current_map} "
                  f"(taught frames: {len(active_cache.get_taught_frames())})")
            
            # === CROSS-MAP NAV: advance chain on map change ===
            if brain.nav_map_chain and brain.is_nav_active() and not brain.is_nav_paused():
                current_pos = (raw_x, raw_y)
                chain_continues = brain.advance_map_chain(current_map, current_pos)
                if not chain_continues:
                    brain.abort_navigation("cross-map chain broken")
        
        brain.update_position(raw_x, raw_y)

        derived = compute_derived_features(context_state, prev_context_state)
        learning_state = build_learning_state(derived, palette_state, tile_state, in_battle)
        
        brain.log_state(learning_state, context_state)
        
        # Action execution confirmation
        brain.confirm_action_executed(context_state, prev_context_state)

        if brain.should_send_new_action():
            taught_frames = cache_manager.get_active_taught_frames()
            map_density = cache_manager.get_map_density()
            
            action = anticipatory_action(
                brain, learning_state, context_state,
                exploration_weight=exploration_weight,
                raw_position=raw_position,
                forced_explore_prob=forced_explore_prob,
                taught_frames=taught_frames,
                map_density=map_density,
                palette_state=palette_state
            )

            if action is not None:
                active_cache.set_pending_action(action.action)
                brain.last_action = action.action
                brain.set_pending_action(action.action)
                if not currently_in_battle:
                    brain.update_menu_trap_tracking(context_state, action.action, raw_position=raw_position)
            else:
                active_cache.set_pending_action("NONE")
        else:
            if brain.pending_action:
                active_cache.set_pending_action(brain.pending_action)

        # === LOGGING ===
        if brain.timestep % 100 == 0:
            memory = brain.get_current_map_memory(current_map)
            visited_count = len(memory['visited_tiles'])
            obs_count = len(memory['obstructions'])
            interactables = len(memory['interactable_objects'])
            coverage = brain.get_exploration_coverage(current_map)
            transitions = memory.get('transitions', [])
            tile_stats = brain.get_tile_interaction_stats(current_map)
            
            tile_needs_probing = brain.should_interact_at_tile(raw_x, raw_y, current_map)
            probe_action, probe_dir = brain.get_best_probe_action(raw_x, raw_y, current_map, current_dir)
            
            dir_name = brain.DIRECTION_NAMES.get(current_dir, '?')
            mode = brain.control_mode
            is_both_mode = brain.should_use_both_mode()
            mode_display = "BOTH ‚ö°" if is_both_mode else mode
            
            total_actions = brain.markov_action_count + brain.curiosity_action_count
            markov_ratio = brain.markov_action_count / max(1, total_actions)
            
            density = cache_manager.get_map_density()
            
            print(f"\n{'='*70}")
            print(f"Step {brain.timestep} | Map {current_map} | Pos ({raw_x}, {raw_y}) facing {dir_name}")
            print(f"  Mode: {mode_display} | Battle: {int(in_battle)} | Stagnation: {brain.state_stagnation_count}")
            
            # === BATTLE STATUS ===
            if currently_in_battle:
                battle_total = brain.battle_action_count
                battle_markov = brain.battle_markov_action_count
                battle_markov_rate = battle_markov / max(1, battle_total)
                print(f"\n  ‚öîÔ∏è IN BATTLE (#{brain.current_battle_id}):")
                print(f"     Frame: {brain.battle_frame_count} | Actions: {battle_total}")
                print(f"     Battle Markov: {battle_markov}/{battle_total} ({battle_markov_rate:.1%})")
                print(f"     Last Markov score: {brain.last_battle_markov_score:.3f} "
                      f"(threshold: {BATTLE_MARKOV_THRESHOLD_LOW:.2f}-{BATTLE_MARKOV_THRESHOLD_HIGH:.2f})")
                if brain.last_battle_markov_action:
                    print(f"     Last Markov suggestion: {brain.last_battle_markov_action}")
                print(f"     Battle data: {'LOADED' if brain.battle_loaded else 'NONE (A fallback)'} "
                      f"({len(brain.battle_transitions)} frames)")
            else:
                # === NAVIGATION STATUS (overworld only) ===
                cross_status = brain.get_cross_map_status()
                
                if brain.is_nav_active():
                    if cross_status['active']:
                        # Cross-map navigation
                        chain_str = ' ‚Üí '.join(str(m) for m in cross_status['chain'])
                        current_step = cross_status['chain_index']
                        total_steps = cross_status['chain_length']
                        
                        if brain.is_nav_paused():
                            print(f"\n  üß≠üåç CROSS-MAP NAV PAUSED:")
                            print(f"     Chain: {chain_str} (step {current_step + 1}/{total_steps})")
                            print(f"     Reason: {cross_status['paused_reason']}")
                            print(f"     Resume check in: {brain.nav_pause_check_countdown} steps")
                            if cross_status['final_target']:
                                ft = cross_status['final_target']
                                print(f"     Final target: ({ft[0]}, {ft[1]}) on map {cross_status['target_map']}")
                        else:
                            nav_progress = f"{brain.nav_path_index}/{len(brain.nav_path)}"
                            print(f"\n  üß≠üåç CROSS-MAP NAV ACTIVE:")
                            print(f"     Chain: {chain_str} (step {current_step + 1}/{total_steps})")
                            print(f"     Current path: {nav_progress} | Steps: {brain.nav_steps_taken}")
                            if brain.nav_target:
                                print(f"     Immediate target: ({brain.nav_target[0]}, {brain.nav_target[1]})")
                            if cross_status['final_target']:
                                ft = cross_status['final_target']
                                print(f"     Final target: ({ft[0]}, {ft[1]}) on map {cross_status['target_map']}")
                    else:
                        # Single-map navigation
                        nav_target = brain.nav_target
                        nav_progress = f"{brain.nav_path_index}/{len(brain.nav_path)}"
                        targets_remaining = len(brain.nav_target_list) - brain.nav_target_index
                        print(f"\n  üß≠ NAVIGATION ACTIVE:")
                        print(f"     Target: ({nav_target[0]}, {nav_target[1]}) | Path: {nav_progress} | Steps: {brain.nav_steps_taken}")
                        print(f"     Targets remaining: {targets_remaining} | Struck: {len(brain.nav_struck_targets)}")
                        if brain.nav_stagnation_count > 0:
                            print(f"     Nav stagnation: {brain.nav_stagnation_count}/{brain.NAV_STAGNATION_LIMIT}")
                
                elif brain.is_in_nav_curiosity_window():
                    print(f"\n  üß≠ NAV CURIOSITY WINDOW: {brain.nav_curiosity_countdown} steps remaining")
                else:
                    nav_status = brain.get_nav_targets_status()
                    if nav_status['loaded']:
                        print(f"\n  üß≠ Navigation: inactive (known area: {brain.known_area_counter}/{brain.KNOWN_AREA_TRIGGER})")
                        print(f"     Taught targets: {nav_status['remaining']} remaining / {nav_status['total']} total ({nav_status['visited']} visited)")
                    else:
                        print(f"\n  üß≠ Navigation: inactive (known area: {brain.known_area_counter}/{brain.KNOWN_AREA_TRIGGER}) [frontier fallback]")
            
            print(f"\n  üß† DECISION MODE:")
            print(f"     Overworld Markov: {brain.markov_action_count} ({markov_ratio:.1%}) | Curiosity: {brain.curiosity_action_count} ({1-markov_ratio:.1%})")
            print(f"     Battle actions: {brain.battle_action_count} | Battle Markov: {brain.battle_markov_action_count}")
            print(f"     Last OW Markov score: {brain.last_markov_score:.3f} (threshold adapts to density)")
            print(f"     Map density: {density['tier']} ({density['taught_frames']} frames, {density['visited']} tiles, {density['coverage']:.0%} coverage)")
            if brain.last_markov_action:
                print(f"     Last OW Markov suggestion: {brain.last_markov_action}")
            
            # Blend status
            if brain.taught_reference['loaded']:
                blend_status = f"Tier {brain.blend_tier}" if brain.blend_tier > 0 else "Inactive"
                print(f"\n  üîÄ BLEND: {blend_status} | Total blends: {brain.blend_count}")
                if brain.blend_tier > 0:
                    ai_w, taught_w = brain.BLEND_RATIOS[brain.blend_tier]
                    print(f"     Current ratio: {ai_w:.0%} AI / {taught_w:.0%} taught")
            
            print(f"\n  üìä EXPLORATION:")
            print(f"     Visited: {visited_count} | Obstructions: {obs_count} | Coverage: {coverage:.0%}")
            print(f"     Interactables found: {interactables}")
            
            # Map graph status
            mg = brain.build_map_graph()
            mg_edges = sum(len(v) for v in mg.values())
            print(f"     Map graph: {len(mg)} maps, {mg_edges} edges")
            
            print(f"\n  üéØ TILE PROBING:")
            print(f"     Tiles probed: {tile_stats['probed']} | Exhausted: {tile_stats['exhausted']} | With success: {tile_stats['with_success']}")
            
            if not currently_in_battle:
                if tile_needs_probing:
                    if probe_action == 'A':
                        print(f"     Current tile: READY TO PROBE (facing untried direction)")
                    elif probe_action:
                        print(f"     Current tile: NEED TO TURN {probe_action} first")
                    else:
                        print(f"     Current tile: NEEDS PROBING (checking directions)")
                else:
                    print(f"     Current tile: EXHAUSTED or fully probed")
                
                tile_state_data = brain.get_tile_interaction_state(raw_x, raw_y, current_map)
                success_info = []
                for d in range(4):
                    attempts = tile_state_data['direction_attempts'].get(d, 0)
                    successes = tile_state_data['direction_successes'].get(d, 0)
                    if attempts > 0:
                        success_info.append(f"{brain.DIRECTION_NAMES.get(d, '?')}:{successes}/{attempts}")
                if success_info:
                    print(f"     Direction results: {', '.join(success_info)}")
            
            if transitions:
                print(f"\n  üö™ TRANSITIONS: {len(transitions)} known")
                for t in transitions[:3]:
                    pos = tuple(t['position']) if isinstance(t['position'], list) else t['position']
                    banned = "üö´" if brain.is_transition_banned(current_map, pos, t['direction']) else ""
                    print(f"     ({pos[0]},{pos[1]}) ‚Üí Map {t['destination_map']} (used {t['use_count']}x) {banned}")
            
            map_debt = brain.map_novelty_debt.get(current_map, 0.0)
            temp_debt = brain.get_temp_debt(current_map)
            if map_debt > 0.1 or temp_debt > 0.1:
                print(f"\n  üí≥ DEBT: map={map_debt:.2f}/{brain.MAX_MAP_DEBT}, temp={temp_debt:.2f}")
            
            if brain.menu_trap_b_boost > 1.0:
                print(f"\n  üîí MENU TRAP: B boost {brain.menu_trap_b_boost:.2f}x ({brain.menu_trap_frames} frames)")
            
            if is_both_mode:
                print(f"\n  ‚ö° BOTH MODE ACTIVE: stagnation={brain.state_stagnation_count}, swaps={brain.unproductive_swap_count}")
            
            if brain.pending_action:
                print(f"\n  ‚è≥ Pending: {brain.pending_action} ({brain.pending_action_frames}/{brain.ACTION_CONFIRM_FRAMES})")
            
            action_utils = sorted([(a.action, a.utility) for a in brain.actions()], key=lambda x: x[1], reverse=True)
            print(f"\n  ‚ö° Utilities: {' '.join([f'{k}:{v:.2f}' for k,v in action_utils])}")
            
            n_actions = len(brain.actions())
            n_entities = len(brain.entities())
            n_total = len(brain.perceptrons)
            print(f"  üß© Perceptrons: {n_total} total ({n_actions} actions, {n_entities} entities)")
            
            if brain.state_stagnation_count > 10:
                print(f"\n  ‚ö†Ô∏è STAGNATION WARNING: {brain.state_stagnation_count}/{brain.STATE_STAGNATION_THRESHOLD}")
            if brain.detected_pattern:
                pattern_str = '-'.join(str(a) for a in brain.detected_pattern)
                print(f"  üîÑ PATTERN DETECTED ({len(brain.detected_pattern)}): {pattern_str} x{brain.pattern_repeat_count}")

        # === MILESTONES ===
        if brain.timestep % 500 == 0 and brain.timestep > 0:
            total_visited = sum(len(m['visited_tiles']) for m in brain.exploration_memory.values())
            total_obs = sum(len(m['obstructions']) for m in brain.exploration_memory.values())
            total_interactables = sum(len(m['interactable_objects']) for m in brain.exploration_memory.values())
            total_transitions = sum(len(m.get('transitions', [])) for m in brain.exploration_memory.values())
            total_probed = sum(len(m.get('tile_interactions', {})) for m in brain.exploration_memory.values())
            total_exhausted = sum(
                sum(1 for t in m.get('tile_interactions', {}).values() if t.get('exhausted', False))
                for m in brain.exploration_memory.values()
            )
            
            total_actions = brain.markov_action_count + brain.curiosity_action_count
            markov_ratio = brain.markov_action_count / max(1, total_actions)
            
            battle_total = brain.battle_action_count
            battle_markov = brain.battle_markov_action_count
            battle_markov_rate = battle_markov / max(1, battle_total)
            
            mg = brain.build_map_graph()
            mg_edges = sum(len(v) for v in mg.values())
            
            print(f"\n{'#'*70}")
            print(f"# MILESTONE {brain.timestep}")
            print(f"# Maps explored: {len(brain.exploration_memory)}")
            print(f"# Tiles visited: {total_visited} | Obstructions: {total_obs}")
            print(f"# Interactables: {total_interactables} | Transitions: {total_transitions}")
            print(f"# Tiles probed: {total_probed} | Exhausted: {total_exhausted}")
            print(f"#")
            print(f"# CACHE SYSTEM:")
            print(f"#   Maps cached: {len(cache_manager.caches)}")
            print(f"#   Active map: {cache_manager.active_map_id} ({len(cache_manager.get_active_taught_frames())} taught frames)")
            print(f"#")
            print(f"# MAP GRAPH:")
            print(f"#   Maps: {len(mg)} | Edges: {mg_edges}")
            print(f"#   Connected: {list(mg.keys())}")
            print(f"#")
            print(f"# HYBRID DECISION STATS:")
            print(f"#   Overworld Markov (imitation): {brain.markov_action_count} ({markov_ratio:.1%})")
            print(f"#   Curiosity (explore): {brain.curiosity_action_count} ({1-markov_ratio:.1%})")
            print(f"#   Taught transitions: {len(brain.taught_transitions)}")
            print(f"#")
            print(f"# BATTLE STATS:")
            print(f"#   Battles fought: {brain.current_battle_id}")
            print(f"#   Battle actions: {battle_total}")
            print(f"#   Battle Markov: {battle_markov}/{battle_total} ({battle_markov_rate:.1%})")
            print(f"#   Battle data: {'LOADED' if brain.battle_loaded else 'NOT LOADED'} ({len(brain.battle_transitions)} frames)")
            print(f"#")
            print(f"# BLEND STATS:")
            print(f"#   Total blends: {brain.blend_count}")
            print(f"#   Current tier: {brain.blend_tier}")
            print(f"#   Taught reference: {'loaded' if brain.taught_reference['loaded'] else 'not loaded'}")
            print(f"#")
            print(f"# NAVIGATION STATS:")
            print(f"#   Active: {brain.is_nav_active()}")
            cross_status = brain.get_cross_map_status()
            if cross_status['active']:
                chain_str = ' ‚Üí '.join(str(m) for m in cross_status['chain'])
                print(f"#   Cross-map: {chain_str} (step {cross_status['chain_index'] + 1}/{cross_status['chain_length']})")
                if cross_status['paused']:
                    print(f"#   PAUSED: {cross_status['paused_reason']}")
            else:
                print(f"#   Cross-map: inactive")
            print(f"#   Struck targets (this session): {len(brain.nav_struck_targets)}")
            nav_status = brain.get_nav_targets_status()
            if nav_status['loaded']:
                print(f"#   Taught targets: {nav_status['remaining']} remaining / {nav_status['total']} total")
            else:
                print(f"#   Taught targets: not loaded (frontier fallback)")
            print(f"{'#'*70}")

            brain.save_model_checkpoint(BASE_PATH / "model_checkpoint.json")
            cache_manager.save_exploration_memory()
            print(f"# Model + exploration saved")

        # === WAIT FOR NEXT STATE (new IOThread read) ===
        for _ in range(10):
            time.sleep(0.005)
            if active_cache.get_version() > last_processed_version:
                break

        # === LEARN ===
        next_ctx, next_pal, next_til, dead, next_raw_pos = active_cache.get_state()
        last_processed_version = active_cache.get_version()
        
        next_in_battle = next_ctx[3]
        next_derived = compute_derived_features(next_ctx, context_state)
        next_learning_state = build_learning_state(next_derived, next_pal, next_til, next_in_battle)

        brain.learn(learning_state, next_learning_state, context_state, next_ctx, dead=dead, 
                    raw_position=raw_position, next_raw_position=next_raw_pos)

        prev_context_state = context_state.copy()
        prev_raw_position = raw_position
        brain.timestep += 1

except KeyboardInterrupt:
    print("\n\nüõë Stopping AI...")
    io_thread.stop()
    io_thread.join(timeout=2)
    cache_manager.save_exploration_memory()
    brain.save_model_checkpoint(BASE_PATH / "model_checkpoint.json")
    print("‚úÖ Saved and stopped.")