In [6]:
# ============================================================================
# CELL 1: State Management & Utilities
# ============================================================================

from pathlib import Path
import json
import numpy as np
import time
from collections import deque

BASE_PATH = Path("C:/Users/natmaw/Documents/Boston Stuff/CS 5100 Foundations of AI/cogai")
ACTION_FILE = BASE_PATH / "action.json"
STATE_FILE = BASE_PATH / "game_state.json"
MODEL_FILE = BASE_PATH / "model_checkpoint.json"  # NEW

EXPECTED_STATE_DIM = 6
PALETTE_DIM = 768
TILE_DIM = 600

# State vector from Lua: [x, y, map_id, in_battle, menu_flag, direction]
# x, y: 0-255 (raw tile coordinates)
# map_id: 0-255
# in_battle: 0 or 1
# menu_flag: 0 or 1 (from game_state == 1)
# direction: 0-3 (DOWN=0, UP=1, LEFT=2, RIGHT=3) - already normalized by Lua

def normalize_game_state(raw_state):
    """Normalize context state for learning.
    
    Lua sends: [x, y, map_id, in_battle, menu_flag, direction]
    - x, y: 0-255 tile coordinates
    - map_id: 0-255
    - in_battle: 0 or 1
    - menu_flag: 0 or 1
    - direction: 0-3 (already normalized by Lua)
    """
    if len(raw_state) < 6:
        return raw_state
    
    normalized = raw_state.copy()
    
    # Normalize x, y to 0-1 range for learning
    normalized[0] = raw_state[0] / 255.0
    normalized[1] = raw_state[1] / 255.0
    
    # Map ID: keep as-is but clamp
    normalized[2] = np.clip(raw_state[2], 0, 255)
    
    # Battle flag: ensure binary
    normalized[3] = 1.0 if raw_state[3] > 0 else 0.0
    
    # Menu flag: ensure 0-1
    normalized[4] = 1.0 if raw_state[4] > 0 else 0.0
    
    # Direction: already 0-3 from Lua, keep as-is
    normalized[5] = int(raw_state[5]) % 4
    
    return normalized

def compute_derived_features(current, prev):
    """Extract temporal features (8D)"""
    if prev is None:
        return np.zeros(8)
    
    # Velocity uses normalized coordinates
    vel_x = current[0] - prev[0]
    vel_y = current[1] - prev[1]
    map_changed = 1.0 if abs(current[2] - prev[2]) > 0.5 else 0.0
    battle_started = 1.0 if current[3] > prev[3] else 0.0
    battle_ended = 1.0 if current[3] < prev[3] else 0.0
    menu_opened = 1.0 if current[4] > prev[4] else 0.0
    menu_closed = 1.0 if current[4] < prev[4] else 0.0
    direction_changed = 1.0 if current[5] != prev[5] else 0.0
    
    return np.array([vel_x, vel_y, map_changed, battle_started, battle_ended,
                     menu_opened, menu_closed, direction_changed])

def build_learning_state(derived, palette, tiles, in_battle):
    """
    HYBRID PERCEPTION:
    - Overworld: tiles (spatial) + palette (context)
    - Battle: palette only (tiles are just UI)
    """
    if in_battle > 0.5:
        state = np.concatenate([derived, palette])
    else:
        state = np.concatenate([derived, tiles, palette])
    
    noise = np.random.randn(len(state)) * 0.0001
    return state + noise

def read_game_state(max_retries=3):
    """
    Returns:
        context_state: normalized state for learning/error calculation
        palette_state: visual palette data
        tile_state: visual tile data
        dead: death flag
        raw_position: (raw_x, raw_y) for tile tracking - NOT normalized
        human_action: action taken by human (NEW)
    """
    if not STATE_FILE.exists():
        return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0), None

    for attempt in range(max_retries):
        try:
            with open(STATE_FILE, "r") as f:
                data = json.loads(f.read())
            
            raw = data.get("state", [])
            palette_raw = data.get("palette", [])
            tiles_raw = data.get("tiles", [])
            dead = bool(data.get("dead", False))
            human_action = data.get("human_action", None)  # NEW
            
            # Store raw position BEFORE normalization (these are tile coordinates 0-255)
            raw_x = int(raw[0]) if len(raw) > 0 else 0
            raw_y = int(raw[1]) if len(raw) > 1 else 0
            raw_position = (raw_x, raw_y)

            context_state = normalize_game_state(np.array(raw, dtype=float))
            palette_state = np.array(palette_raw, dtype=float) if palette_raw else np.zeros(PALETTE_DIM)
            tile_state = np.array(tiles_raw, dtype=float) if tiles_raw else np.zeros(TILE_DIM)
            
            break

        except (json.JSONDecodeError, ValueError):
            if attempt < max_retries - 1:
                time.sleep(0.001)
                continue
            return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0), None
        except Exception:
            return np.zeros(EXPECTED_STATE_DIM), np.zeros(PALETTE_DIM), np.zeros(TILE_DIM), False, (0, 0), None

    if context_state.shape[0] < EXPECTED_STATE_DIM:
        context_state = np.pad(context_state, (0, EXPECTED_STATE_DIM - context_state.shape[0]))
    elif context_state.shape[0] > EXPECTED_STATE_DIM:
        context_state = context_state[:EXPECTED_STATE_DIM]
    
    if palette_state.shape[0] < PALETTE_DIM:
        palette_state = np.pad(palette_state, (0, PALETTE_DIM - palette_state.shape[0]))
    elif palette_state.shape[0] > PALETTE_DIM:
        palette_state = palette_state[:PALETTE_DIM]
    
    if tile_state.shape[0] < TILE_DIM:
        tile_state = np.pad(tile_state, (0, TILE_DIM - tile_state.shape[0]))
    elif tile_state.shape[0] > TILE_DIM:
        tile_state = tile_state[:TILE_DIM]

    return context_state, palette_state, tile_state, dead, raw_position, human_action

def write_action(action_name):
    if action_name:
        action_name = action_name.upper()
    
    try:
        with open(ACTION_FILE, "w") as f:
            json.dump({"action": action_name}, f)
            f.flush()
    except Exception as e:
        print(f"[ERROR] Failed to write action: {e}")

In [7]:
# ============================================================================
# CELL 2: Perceptron Classes
# ============================================================================

class Perceptron:
    def __init__(self, kind, action=None, group=None, entity_type=None):
        self.kind = kind
        self.action = action
        self.group = group
        self.entity_type = entity_type
        
        self.utility = 1.0
        self.weights = None
        
        self.eligibility_fast = 0.0
        self.eligibility_slow = 0.0
        
        self.familiarity = 0.0
        self.activation_history = deque(maxlen=10)
        
        self.learning_rate = 0.01
        self.prediction_errors = deque(maxlen=50)

    def ensure_weights(self, dim):
        if self.weights is None:
            self.weights = np.random.randn(dim) * 0.001

    def predict(self, state):
        self.ensure_weights(len(state))
        raw_activation = np.dot(self.weights, state)
        
        if self.kind == "entity":
            novelty_factor = 1.0 / (1.0 + np.sqrt(self.familiarity * 0.5))
            decayed_activation = raw_activation * novelty_factor
            self.activation_history.append(abs(raw_activation))
            return decayed_activation
        else:
            return raw_activation

    def adapt_learning_rate(self):
        if len(self.prediction_errors) >= 50:
            avg_error = np.mean(self.prediction_errors)
            
            if avg_error < 0.1:
                self.learning_rate = max(0.001, self.learning_rate * 0.99)
            elif avg_error > 0.5:
                self.learning_rate = min(0.05, self.learning_rate * 1.01)

    def update(self, state, error, gamma_fast=0.5, gamma_slow=0.95, stagnation=0.0):
        self.ensure_weights(len(state))
        
        self.eligibility_fast = gamma_fast * self.eligibility_fast + 1.0
        self.eligibility_slow = gamma_slow * self.eligibility_slow + 1.0
        
        self.adapt_learning_rate()
        
        fast_update = 0.7 * self.learning_rate * error * state * self.eligibility_fast
        slow_update = 0.3 * self.learning_rate * error * state * self.eligibility_slow
        self.weights += fast_update + slow_update

        if self.kind == "action":
            if error > 0.01:
                if stagnation > 0.5:
                    self.utility *= 0.97
                elif error > 0.2:
                    self.utility = min(self.utility * 1.02, 2.0)
                else:
                    self.utility *= 0.995
            
            if self.group == "move":
                self.utility = np.clip(self.utility, 0.1, 2.0)
            else:
                self.utility = np.clip(self.utility, 0.01, 2.0)
        
        if self.kind == "entity" and len(self.activation_history) > 0:
            recent_avg = np.mean(self.activation_history)
            if recent_avg > 0.1:
                self.familiarity += 0.03
        
        if self.kind == "entity":
            prediction = self.predict(state)
            self.prediction_errors.append(abs(prediction - error))


class ControlSwapPerceptron(Perceptron):
    def __init__(self):
        super().__init__(kind="control_swap")
        self.swap_history = deque(maxlen=100)
        self.confidence = 0.0
        
    def should_swap(self, state, movement_stagnation):
        if self.weights is None:
            return False, 0.0
        
        self.ensure_weights(len(state))
        swap_score = np.dot(self.weights, state)
        stagnation_factor = np.tanh(movement_stagnation / 5.0)
        combined_score = swap_score * 0.7 + stagnation_factor * 0.3
        
        return combined_score > 0.5, abs(combined_score)
    
    def record_swap_outcome(self, state, swapped, novelty_gained):
        self.swap_history.append((swapped, novelty_gained))
        
        if len(self.swap_history) >= 20:
            recent = list(self.swap_history)[-20:]
            successful = sum(1 for swap, nov in recent if swap and nov > 0.2)
            self.confidence = successful / 20.0

In [8]:
# ============================================================================
# CELL 3: Brain Class - All Updates Integrated
# ============================================================================
# CHANGES FROM PREVIOUS VERSION:
# 1. Added get_best_probe_action() for turn-then-interact sequencing
# 2. Added "both" mode support (should_use_both_mode, BOTH_MODE thresholds)
# 3. Added debt caps (MAX_MAP_DEBT, MAX_LOCATION_DEBT) and decay_all_debts()
# 4. Added direction change as partial progress tracking
# 5. Increased INTERACTION_VERIFY_FRAMES from 5 to 8
# ============================================================================

class Brain:
    def __init__(self):
        self.perceptrons = []
        
        self.prev_learning_states = deque(maxlen=50)
        self.prev_context_states = deque(maxlen=10)
        self.last_positions = deque(maxlen=30)
        self.action_history = deque(maxlen=100)
        
        self.control_mode = "move"
        self.timestep = 0
        self.last_action = None
        self.last_direction = 0
        
        self.MOVE_UTILITY_FLOOR = 0.05
        self.INTERACT_UTILITY_FLOOR = 0.15
        
        # === PERSISTENT EXPLORATION MEMORY ===
        self.EXPLORATION_MEMORY_FILE = BASE_PATH / "exploration_memory.json"
        self.exploration_memory = {}
        self.current_map_id = None
        self.SAVE_INTERVAL = 100
        
        # Direction mapping
        self.DIRECTION_NAMES = {0: "DOWN", 1: "UP", 2: "LEFT", 3: "RIGHT"}
        self.DIRECTION_TO_INT = {"DOWN": 0, "UP": 1, "LEFT": 2, "RIGHT": 3}
        self.INT_TO_ACTION = {0: "DOWN", 1: "UP", 2: "LEFT", 3: "RIGHT"}
        
        self.DIRECTION_DELTAS_INT = {0: (0, 1), 1: (0, -1), 2: (-1, 0), 3: (1, 0)}
        self.ACTION_DELTAS = {"UP": (0, -1), "DOWN": (0, 1), "LEFT": (-1, 0), "RIGHT": (1, 0)}
        self.DELTA_TO_DIRECTION = {(0, 1): 0, (0, -1): 1, (-1, 0): 2, (1, 0): 3}
        
        self.load_exploration_memory()
        
        # === ACTION EXECUTION CONFIRMATION ===
        self.pending_action = None
        self.pending_action_frames = 0
        self.ACTION_CONFIRM_FRAMES = 3
        self.last_confirmed_action = None
        
        # === TILE INTERACTION PROBING ===
        self.INTERACTION_VERIFY_FRAMES = 8
        self.MIN_SUCCESS_RATE_THRESHOLD = 0.1
        self.pending_interaction_verify = None
        self.interaction_verify_countdown = 0
        
        # === MENU ESCAPE B-BOOST ===
        self.menu_trap_frames = 0
        self.menu_trap_b_boost = 1.0
        self.menu_trap_position = None
        self.B_BOOST_INCREMENT = 0.15
        self.B_BOOST_MAX = 3.0
        self.MENU_TRAP_THRESHOLD = 5
        self.original_b_utility = None
        
        # === ADAPTIVE MODE SWAPPING ===
        self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD = 15
        self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD = 25
        self.move_to_interact_threshold = self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD
        self.interact_to_move_threshold = self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD
        self.THRESHOLD_INCREMENT = 15
        self.MAX_THRESHOLD = 150
        self.frames_in_current_mode = 0
        self.swap_chain_count = 0
        self.position_at_mode_swap = None
        self.last_map_id = None
        self.last_battle_state = None
        
        # === UNPRODUCTIVE MODE SWAP TRACKING ===
        self.UNPRODUCTIVE_SWAP_THRESHOLD = 3
        self.unproductive_swap_count = 0
        self.utilities_before_swapping = {}
        self.swap_chain_active = False
        
        # === STATE STAGNATION DETECTION ===
        self.STATE_STAGNATION_THRESHOLD = 20
        self.state_stagnation_count = 0
        self.last_context_state_hash = None
        self.stagnation_initiator_action = None
        self.STAGNATION_INITIATOR_PENALTY = 0.7
        
        # === NEW: "BOTH" MODE THRESHOLDS ===
        self.BOTH_MODE_STAGNATION_THRESHOLD = 35
        self.BOTH_MODE_SWAP_THRESHOLD = 5
        
        # === NEW: TURN AS PROGRESS TRACKING ===
        self.last_direction_for_progress = None
        self.direction_change_counts_as_progress = True
        
        # === NOVELTY WEIGHTS ===
        self.UNVISITED_TILE_BONUS = 1.5
        self.OBSTRUCTION_PENALTY = 0.25
        
        # === TRANSITION SYSTEM ===
        self.TRANSITION_ATTRACTION_WEIGHT = 0.6
        self.TEMP_DEBT_ACCUMULATION = 0.5
        self.TEMP_DEBT_DECAY = 0.02
        self.TEMP_DEBT_MAX = 15.0
        
        # === NEW: DEBT CAPS AND DECAY ===
        self.MAX_MAP_DEBT = 10.0
        self.MAX_LOCATION_DEBT = 5.0
        self.DEBT_DECAY_RATE = 0.005
        
        # === TRANSITION BAN SYSTEM ===
        self.transition_bans = {}
        self.BAN_VICINITY_RADIUS = 3
        self.BAN_COVERAGE_LIFT_THRESHOLD = 0.6
        self.BAN_TIMEOUT_STEPS = 300
        
        # Multi-scale memory
        self.visited_maps = {}
        self.map_novelty_debt = {}
        self.location_memory = {}
        self.location_novelty = {}
        self.action_execution_count = {}
        
        self.swap_perceptron = ControlSwapPerceptron()
        self.error_history = deque(maxlen=1000)
        self.numeric_error_history = deque(maxlen=1000)
        self.visual_error_history = deque(maxlen=1000)
        self._entity_norms_cache = {}
        self._cache_valid = False
        self.innate_entities_spawned = False
        
        # === REPETITION CORRECTION ===
        self.consecutive_action_count = 0
        self.current_repeated_action = None
        self.LEARNING_SLOWDOWN_START = 3
        self.LEARNING_SLOWDOWN_MAX = 10
        self.PENALTY_THRESHOLD = 12
        self.HARD_RESET_THRESHOLD = 18
        
        # === PATTERN DETECTION ===
        self.PATTERN_CHECK_WINDOW = 50
        self.PATTERN_MIN_REPEATS = 3
        self.PATTERN_MAX_LENGTH = 10
        self.detected_pattern = None
        self.pattern_repeat_count = 0

        # === PROBE ACTION CACHE ===
        self._cached_probe_action = None
        self._cached_probe_dir = None
        self._probe_cache_position = None
        
        # === TEACHING MODE (NEW) ===
        self.teaching_mode = True
        self.demonstration_count = 0
        self.context_action_stats = {}

    # =========================================================================
    # ACTION EXECUTION CONFIRMATION
    # =========================================================================
    
    def set_pending_action(self, action_name):
        self.pending_action = action_name
        self.pending_action_frames = 0
    
    def confirm_action_executed(self, context_state, prev_context_state):
        if self.pending_action is None:
            return True
        self.pending_action_frames += 1
        action_executed = False
        if prev_context_state is not None:
            if self.pending_action in ["UP", "DOWN", "LEFT", "RIGHT"]:
                pos_changed = (context_state[0] != prev_context_state[0] or 
                              context_state[1] != prev_context_state[1])
                dir_changed = context_state[5] != prev_context_state[5]
                action_executed = pos_changed or dir_changed
            elif self.pending_action in ["A", "B", "Start", "Select"]:
                menu_changed = abs(context_state[4] - prev_context_state[4]) > 0.1
                battle_changed = context_state[3] != prev_context_state[3]
                map_changed = context_state[2] != prev_context_state[2]
                action_executed = menu_changed or battle_changed or map_changed
        if action_executed or self.pending_action_frames >= self.ACTION_CONFIRM_FRAMES:
            self.last_confirmed_action = self.pending_action
            self.pending_action = None
            self.pending_action_frames = 0
            return True
        return False
    
    def should_send_new_action(self):
        return self.pending_action is None or self.pending_action_frames >= self.ACTION_CONFIRM_FRAMES

    # =========================================================================
    # EXPLORATION MEMORY PERSISTENCE
    # =========================================================================
    
    def load_exploration_memory(self):
        try:
            if self.EXPLORATION_MEMORY_FILE.exists():
                with open(self.EXPLORATION_MEMORY_FILE, 'r') as f:
                    data = json.load(f)
                    self.exploration_memory = {}
                    for map_key, map_data in data.items():
                        map_id = int(map_key.replace('map_', ''))
                        self.exploration_memory[map_id] = self._deserialize_map_memory(map_data)
                print(f"  Loaded exploration memory: {len(self.exploration_memory)} maps")
            else:
                self.exploration_memory = {}
        except Exception as e:
            print(f"  Error loading exploration memory: {e}")
            self.exploration_memory = {}

    def _deserialize_map_memory(self, map_data):
        memory = {
            'visited_tiles': set(tuple(t) for t in map_data.get('visited_tiles', [])),
            'obstructions': set(tuple(t) for t in map_data.get('obstructions', [])),
            'interactable_objects': map_data.get('interactable_objects', []),
            'last_visited_timestep': map_data.get('last_visited_timestep', 0),
            'transitions': map_data.get('transitions', []),
            'temp_debt': map_data.get('temp_debt', 0.0),
            'tile_interactions': {}
        }
        for tile_key, tile_data in map_data.get('tile_interactions', {}).items():
            memory['tile_interactions'][tile_key] = {
                'directions_tried': set(tile_data.get('directions_tried', [])),
                'direction_attempts': {int(k): v for k, v in tile_data.get('direction_attempts', {}).items()},
                'direction_successes': {int(k): v for k, v in tile_data.get('direction_successes', {}).items()},
                'exhausted': tile_data.get('exhausted', False)
            }
        return memory

    def save_exploration_memory(self):
        try:
            data = {f'map_{mid}': self._serialize_map_memory(md) for mid, md in self.exploration_memory.items()}
            with open(self.EXPLORATION_MEMORY_FILE, 'w') as f:
                json.dump(data, f, indent=2)
        except Exception as e:
            print(f"  Error saving exploration memory: {e}")

    def _serialize_map_memory(self, map_data):
        serialized_ti = {}
        for tile_key, td in map_data.get('tile_interactions', {}).items():
            serialized_ti[tile_key] = {
                'directions_tried': list(td.get('directions_tried', set())),
                'direction_attempts': {str(k): v for k, v in td.get('direction_attempts', {}).items()},
                'direction_successes': {str(k): v for k, v in td.get('direction_successes', {}).items()},
                'exhausted': td.get('exhausted', False)
            }
        return {
            'visited_tiles': list(map_data['visited_tiles']),
            'obstructions': list(map_data['obstructions']),
            'interactable_objects': map_data['interactable_objects'],
            'last_visited_timestep': map_data['last_visited_timestep'],
            'transitions': map_data.get('transitions', []),
            'temp_debt': map_data.get('temp_debt', 0.0),
            'tile_interactions': serialized_ti
        }

    def get_current_map_memory(self, map_id):
        if map_id not in self.exploration_memory:
            self.exploration_memory[map_id] = {
                'visited_tiles': set(), 'obstructions': set(), 'interactable_objects': [],
                'last_visited_timestep': self.timestep, 'transitions': [], 'temp_debt': 0.0,
                'tile_interactions': {}
            }
        return self.exploration_memory[map_id]

    def record_visited_tile(self, x, y, map_id):
        memory = self.get_current_map_memory(map_id)
        memory['visited_tiles'].add((int(x), int(y)))
        memory['last_visited_timestep'] = self.timestep

    def record_obstruction(self, x, y, map_id, direction):
        dx, dy = self.DIRECTION_DELTAS_INT.get(direction, (0, 0))
        memory = self.get_current_map_memory(map_id)
        memory['obstructions'].add((int(x + dx), int(y + dy)))

    # =========================================================================
    # TILE-BASED INTERACTION PROBING
    # =========================================================================
    
    def get_tile_interaction_key(self, x, y):
        return f"{int(x)}_{int(y)}"
    
    def get_tile_interaction_state(self, x, y, map_id):
        memory = self.get_current_map_memory(map_id)
        tile_key = self.get_tile_interaction_key(x, y)
        if tile_key not in memory['tile_interactions']:
            memory['tile_interactions'][tile_key] = {
                'directions_tried': set(),
                'direction_attempts': {0: 0, 1: 0, 2: 0, 3: 0},
                'direction_successes': {0: 0, 1: 0, 2: 0, 3: 0},
                'exhausted': False
            }
        return memory['tile_interactions'][tile_key]
    
    def should_interact_at_tile(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        if tile_state['exhausted']:
            return False
        if len(tile_state['directions_tried']) < 4:
            return True
        for d in range(4):
            attempts = tile_state['direction_attempts'].get(d, 0)
            successes = tile_state['direction_successes'].get(d, 0)
            if attempts > 0 and successes / attempts >= self.MIN_SUCCESS_RATE_THRESHOLD:
                return True
        return False
    
    def get_untried_directions(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        return [d for d in range(4) if d not in tile_state['directions_tried']]
    
    def get_best_interaction_direction(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        untried = self.get_untried_directions(x, y, map_id)
        if untried:
            return untried[0]
        best_dir, best_rate = None, 0.0
        for d in range(4):
            attempts = tile_state['direction_attempts'].get(d, 0)
            if attempts > 0:
                rate = tile_state['direction_successes'].get(d, 0) / attempts
                if rate > best_rate:
                    best_rate, best_dir = rate, d
        return best_dir
    
    def get_best_probe_action(self, raw_x, raw_y, current_map, current_dir):
        """Cached version - returns (action, target_direction) for tile probing."""
        cache_key = (raw_x, raw_y, current_map, current_dir)
        
        if self._probe_cache_position == cache_key:
            return self._cached_probe_action, self._cached_probe_dir
        
        if not self.should_interact_at_tile(raw_x, raw_y, current_map):
            result = (None, None)
        else:
            untried = self.get_untried_directions(raw_x, raw_y, current_map)
            if not untried:
                best_dir = self.get_best_interaction_direction(raw_x, raw_y, current_map)
                if best_dir is not None:
                    result = ('A', current_dir) if current_dir == best_dir else (self.INT_TO_ACTION[best_dir], best_dir)
                else:
                    result = (None, None)
            elif current_dir in untried:
                result = ('A', current_dir)
            else:
                target_dir = untried[0]
                result = (self.INT_TO_ACTION[target_dir], target_dir)
        
        self._probe_cache_position = cache_key
        self._cached_probe_action, self._cached_probe_dir = result
        return result
    
    def record_tile_interaction_attempt(self, x, y, map_id, direction, success):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        tile_state['directions_tried'].add(direction)
        tile_state['direction_attempts'][direction] = tile_state['direction_attempts'].get(direction, 0) + 1
        if success:
            tile_state['direction_successes'][direction] = tile_state['direction_successes'].get(direction, 0) + 1
            memory = self.get_current_map_memory(map_id)
            dir_name = self.DIRECTION_NAMES.get(direction, str(direction))
            interactable = [int(x), int(y), dir_name]
            if interactable not in memory['interactable_objects']:
                memory['interactable_objects'].append(interactable)
                print(f"  üéØ INTERACTABLE FOUND: ({x}, {y}) facing {dir_name}")
        self._check_tile_exhaustion(x, y, map_id)
    
    def _check_tile_exhaustion(self, x, y, map_id):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        if len(tile_state['directions_tried']) < 4:
            return
        if not any(tile_state['direction_successes'].get(d, 0) > 0 for d in range(4)):
            tile_state['exhausted'] = True
            print(f"  ‚úì Tile ({x}, {y}) exhausted - no interactions found")
    
    def get_direction_success_rate(self, x, y, map_id, direction):
        tile_state = self.get_tile_interaction_state(x, y, map_id)
        attempts = tile_state['direction_attempts'].get(direction, 0)
        if attempts == 0:
            return None
        return tile_state['direction_successes'].get(direction, 0) / attempts
    
    def start_interaction_verification(self, x, y, map_id, direction):
        self.pending_interaction_verify = {'x': x, 'y': y, 'map_id': map_id, 'direction': direction}
        self.interaction_verify_countdown = self.INTERACTION_VERIFY_FRAMES
    
    def check_interaction_verification(self, context_state, prev_context_state):
        if self.pending_interaction_verify is None:
            return
        self.interaction_verify_countdown -= 1
        success = False
        if prev_context_state is not None:
            menu_changed = abs(context_state[4] - prev_context_state[4]) > 0.1
            battle_started = context_state[3] > 0.5 and prev_context_state[3] <= 0.5
            map_changed = int(context_state[2]) != int(prev_context_state[2])
            success = menu_changed or battle_started or map_changed
        if success or self.interaction_verify_countdown <= 0:
            info = self.pending_interaction_verify
            self.record_tile_interaction_attempt(info['x'], info['y'], info['map_id'], info['direction'], success)
            self.pending_interaction_verify = None

    # =========================================================================
    # TRANSITION SYSTEM
    # =========================================================================
    
    def record_transition(self, from_pos, from_map, to_map, direction, action_type):
        memory = self.get_current_map_memory(from_map)
        for t in memory['transitions']:
            if t['position'] == from_pos and t['direction'] == direction:
                t['use_count'] += 1
                t['last_used'] = self.timestep
                return
        memory['transitions'].append({
            'position': from_pos, 'direction': direction, 'action': action_type,
            'destination_map': to_map, 'use_count': 1, 'last_used': self.timestep
        })
        print(f"  üö™ TRANSITION FOUND: Map {from_map} ({from_pos}) ‚Üí Map {to_map}")

    def get_transition_attraction(self, current_map):
        memory = self.get_current_map_memory(current_map)
        transitions = memory.get('transitions', [])
        if not transitions:
            return 0.0, None
        current_debt = self.map_novelty_debt.get(current_map, 0.0)
        current_temp_debt = self.get_temp_debt(current_map)
        current_coverage = self.get_exploration_coverage(current_map)
        best_attraction, best_transition = 0.0, None
        for t in transitions:
            if self.is_transition_banned(current_map, t['position'], t['direction']):
                continue
            dest_map = t['destination_map']
            dest_debt = self.map_novelty_debt.get(dest_map, 0.0)
            dest_temp_debt = self.get_temp_debt(dest_map)
            dest_coverage = self.get_exploration_coverage(dest_map)
            debt_diff = (current_debt + current_temp_debt * 2.0) - (dest_debt + dest_temp_debt * 2.0)
            coverage_diff = current_coverage - dest_coverage
            attraction = debt_diff * 0.5 + coverage_diff * 0.5
            if t['use_count'] < 3:
                attraction *= 1.5
            if attraction > best_attraction:
                best_attraction, best_transition = attraction, t
        return best_attraction * self.TRANSITION_ATTRACTION_WEIGHT, best_transition

    # =========================================================================
    # TRANSITION BAN SYSTEM
    # =========================================================================
    
    def create_transition_ban(self, map_id, tile_pos, direction_back):
        self.transition_bans[map_id] = {
            'banned_tile': tile_pos, 'banned_direction': direction_back,
            'vicinity_radius': self.BAN_VICINITY_RADIUS, 'vicinity_active': False,
            'created_at': self.timestep
        }
        print(f"  üö´ TRANSITION BAN: Map {map_id} at {tile_pos} facing {self.DIRECTION_NAMES.get(direction_back, '?')}")
    
    def is_transition_banned(self, map_id, position, direction):
        if map_id not in self.transition_bans:
            return False
        ban = self.transition_bans[map_id]
        banned_tile = tuple(ban['banned_tile']) if isinstance(ban['banned_tile'], list) else ban['banned_tile']
        position = tuple(position) if isinstance(position, list) else position
        if position == banned_tile and direction == ban['banned_direction']:
            return True
        if ban['vicinity_active']:
            dist = abs(position[0] - banned_tile[0]) + abs(position[1] - banned_tile[1])
            if dist <= ban['vicinity_radius'] and direction == ban['banned_direction']:
                return True
        return False
    
    def is_position_banned(self, map_id, x, y, direction):
        return self.is_transition_banned(map_id, (x, y), direction)
    
    def update_transition_ban(self, map_id, current_pos):
        if map_id not in self.transition_bans:
            return
        ban = self.transition_bans[map_id]
        banned_tile = tuple(ban['banned_tile']) if isinstance(ban['banned_tile'], list) else ban['banned_tile']
        if not ban['vicinity_active'] and abs(current_pos[0] - banned_tile[0]) + abs(current_pos[1] - banned_tile[1]) >= 3:
            ban['vicinity_active'] = True
            print(f"  üö´ VICINITY BAN ACTIVE: Map {map_id}")
    
    def check_ban_lift_conditions(self, map_id):
        if map_id not in self.transition_bans:
            return
        ban = self.transition_bans[map_id]
        should_lift, reason = False, ""
        memory = self.get_current_map_memory(map_id)
        non_banned = [t for t in memory.get('transitions', []) if not self.is_transition_banned(map_id, t['position'], t['direction'])]
        if non_banned:
            should_lift, reason = True, "alternative transition found"
        elif self.get_exploration_coverage(map_id) >= self.BAN_COVERAGE_LIFT_THRESHOLD:
            should_lift, reason = True, f"coverage reached"
        elif self.timestep - ban['created_at'] >= self.BAN_TIMEOUT_STEPS:
            should_lift, reason = True, "timeout"
        if should_lift:
            del self.transition_bans[map_id]
            print(f"  ‚úÖ BAN LIFTED: Map {map_id} - {reason}")

    # =========================================================================
    # DEBT SYSTEMS
    # =========================================================================
    
    def get_temp_debt(self, map_id):
        memory = self.get_current_map_memory(map_id)
        raw_debt = memory.get('temp_debt', 0.0)
        if map_id != self.current_map_id:
            steps_away = self.timestep - memory.get('last_visited_timestep', 0)
            return max(0.0, raw_debt - steps_away * self.TEMP_DEBT_DECAY)
        return raw_debt

    def accumulate_temp_debt(self, map_id):
        memory = self.get_current_map_memory(map_id)
        memory['temp_debt'] = min(self.TEMP_DEBT_MAX, memory.get('temp_debt', 0.0) + self.TEMP_DEBT_ACCUMULATION)

    def decay_all_debts(self):
        """Decay debts for non-current locations to prevent runaway accumulation."""
        for map_id in list(self.map_novelty_debt.keys()):
            if map_id != self.current_map_id:
                self.map_novelty_debt[map_id] *= (1.0 - self.DEBT_DECAY_RATE)
                if self.map_novelty_debt[map_id] < 0.1:
                    del self.map_novelty_debt[map_id]
        
        current_loc = None
        if self.current_map_id is not None and len(self.last_positions) > 0:
            pos = self.last_positions[-1]
            current_loc = self.get_location_key(pos[0], pos[1], self.current_map_id)
        
        for loc in list(self.location_novelty.keys()):
            if loc != current_loc:
                self.location_novelty[loc] *= (1.0 - self.DEBT_DECAY_RATE)
                if self.location_novelty[loc] < 0.1:
                    del self.location_novelty[loc]

    def get_exploration_coverage(self, map_id):
        memory = self.get_current_map_memory(map_id)
        visited = len(memory['visited_tiles'])
        obstructions = len(memory['obstructions'])
        if visited == 0 or visited + obstructions < 10:
            return 0.0
        return visited / (visited + obstructions)

    def detect_obstruction(self, prev_context, context_state, raw_position, prev_raw_position):
        if prev_context is None or prev_raw_position is None:
            return False
        if self.last_action not in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
            return False
        if raw_position == prev_raw_position:
            self.record_obstruction(raw_position[0], raw_position[1], int(context_state[2]), int(context_state[5]))
            return True
        return False

    # =========================================================================
    # MENU TRAP B-BOOST
    # =========================================================================
    
    def update_menu_trap_tracking(self, context_state, action_taken, raw_position=None):
        current_pos = raw_position if raw_position else (round(context_state[0] * 255), round(context_state[1] * 255))
        if self.menu_trap_position is not None and current_pos != self.menu_trap_position:
            self.reset_menu_trap_boost()
            return
        if self.get_context_state_hash(context_state) == self.last_context_state_hash:
            if action_taken in ["A", "B", "Start", "Select"]:
                self.menu_trap_frames += 1
                self.menu_trap_position = current_pos
                if self.menu_trap_frames > self.MENU_TRAP_THRESHOLD:
                    if self.original_b_utility is None:
                        for a in self.actions():
                            if a.action == 'B':
                                self.original_b_utility = a.utility
                                break
                    self.menu_trap_b_boost = min(self.B_BOOST_MAX, self.menu_trap_b_boost + self.B_BOOST_INCREMENT)
        elif current_pos != self.menu_trap_position:
            self.reset_menu_trap_boost()

    def reset_menu_trap_boost(self):
        if self.menu_trap_b_boost > 1.0 and self.original_b_utility is not None:
            for a in self.actions():
                if a.action == 'B':
                    a.utility = self.original_b_utility
                    break
        self.menu_trap_frames = 0
        self.menu_trap_b_boost = 1.0
        self.menu_trap_position = None
        self.original_b_utility = None

    # =========================================================================
    # STANDARD METHODS
    # =========================================================================
    
    def add(self, p):
        self.perceptrons.append(p)
        self._cache_valid = False

    def actions(self):
        return [p for p in self.perceptrons if p.kind == "action"]

    def entities(self):
        return [p for p in self.perceptrons if p.kind == "entity"]

    def get_location_key(self, x, y, map_id, bin_size=5):
        return (int(map_id), int(x // bin_size) * bin_size, int(y // bin_size) * bin_size)

    def is_near_map_edge(self, x, y):
        return x < 10 or x > 245 or y < 10 or y > 245

    def record_action_execution(self, action_name):
        if action_name:
            self.action_execution_count[action_name] = self.action_execution_count.get(action_name, 0) + 1

    def get_position_stagnation(self):
        if len(self.last_positions) < 2:
            return 0
        current_pos = self.last_positions[-1]
        return sum(1 for pos in reversed(list(self.last_positions)[:-1]) if pos == current_pos)

    def get_group_weight(self, group):
        return sum(a.utility for a in self.actions() if a.group == group)

    # =========================================================================
    # MODE SWAP & STAGNATION
    # =========================================================================
    
    def get_context_state_hash(self, context_state):
        return (round(context_state[0], 2), round(context_state[1], 2), int(context_state[2]),
                int(context_state[3]), round(context_state[4], 2), int(context_state[5]))

    def check_state_stagnation(self, context_state):
        current_hash = self.get_context_state_hash(context_state)
        if current_hash == self.last_context_state_hash:
            self.state_stagnation_count += 1
            if self.state_stagnation_count == 1 and self.last_action:
                self.stagnation_initiator_action = self.last_action
        else:
            self.state_stagnation_count = 0
            self.stagnation_initiator_action = None
        self.last_context_state_hash = current_hash
        return self.state_stagnation_count >= self.STATE_STAGNATION_THRESHOLD

    def check_direction_change_progress(self, context_state):
        """Check if direction changed - counts as partial progress."""
        current_dir = int(context_state[5])
        if self.last_direction_for_progress is None:
            self.last_direction_for_progress = current_dir
            return False
        changed = current_dir != self.last_direction_for_progress
        self.last_direction_for_progress = current_dir
        return changed

    def apply_stagnation_initiator_penalty(self):
        if self.stagnation_initiator_action is None:
            return
        for a in self.actions():
            if a.action == self.stagnation_initiator_action:
                old_util = a.utility
                a.utility *= self.STAGNATION_INITIATOR_PENALTY
                floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                a.utility = max(a.utility, floor)
                print(f"  üìç STAGNATION PENALTY: {self.stagnation_initiator_action} {old_util:.3f} ‚Üí {a.utility:.3f}")
                break
        self.stagnation_initiator_action = None

    def check_productive_change(self, context_state):
        current_map = int(context_state[2])
        current_battle = context_state[3] > 0.5
        current_pos = (context_state[0], context_state[1])
        productive, reason = False, ""
        
        if self.last_map_id is not None and current_map != self.last_map_id:
            productive, reason = True, "map change"
        if self.last_battle_state is not None and current_battle != self.last_battle_state:
            productive, reason = True, "battle change"
        if self.position_at_mode_swap is not None:
            dist = np.sqrt((current_pos[0] - self.position_at_mode_swap[0])**2 + 
                          (current_pos[1] - self.position_at_mode_swap[1])**2)
            if dist > 0.03:
                productive, reason = True, f"moved {dist*255:.1f} tiles"
        
        if self.direction_change_counts_as_progress and self.check_direction_change_progress(context_state):
            self.state_stagnation_count = max(0, self.state_stagnation_count - 5)
        
        self.last_map_id = current_map
        self.last_battle_state = current_battle
        return productive, reason

    def on_productive_change(self, reason):
        self.move_to_interact_threshold = self.DEFAULT_MOVE_TO_INTERACT_THRESHOLD
        self.interact_to_move_threshold = self.DEFAULT_INTERACT_TO_MOVE_THRESHOLD
        self.swap_chain_count = 0
        self.state_stagnation_count = 0
        self.stagnation_initiator_action = None
        self.unproductive_swap_count = 0

    def on_mode_swap(self, from_mode, to_mode):
        self.swap_chain_count += 1
        self.frames_in_current_mode = 0
        self.unproductive_swap_count += 1
        if self.unproductive_swap_count >= self.UNPRODUCTIVE_SWAP_THRESHOLD:
            self._reset_highest_to_third(to_mode)
            self.unproductive_swap_count = 0
        if to_mode == "interact":
            self.interact_to_move_threshold = min(self.MAX_THRESHOLD, self.interact_to_move_threshold + self.THRESHOLD_INCREMENT)
        else:
            self.move_to_interact_threshold = min(self.MAX_THRESHOLD, self.move_to_interact_threshold + self.THRESHOLD_INCREMENT)

    def _reset_highest_to_third(self, mode):
        if mode in ["battle", "both"]:
            return
        group = "move" if mode == "move" else "interact"
        group_actions = [a for a in self.actions() if a.group == group]
        if len(group_actions) < 3:
            return
        sorted_actions = sorted(group_actions, key=lambda a: a.utility, reverse=True)
        floor = self.INTERACT_UTILITY_FLOOR if group == "interact" else self.MOVE_UTILITY_FLOOR
        sorted_actions[0].utility = max(sorted_actions[2].utility * 0.9, floor)

    def should_use_both_mode(self):
        """Check if we should allow all actions (both mode)."""
        return (self.state_stagnation_count > self.BOTH_MODE_STAGNATION_THRESHOLD or 
                self.unproductive_swap_count > self.BOTH_MODE_SWAP_THRESHOLD)

    def determine_control_mode(self, context_state, raw_position=None):
        if context_state[3] > 0.5:
            return "battle"
        
        self.frames_in_current_mode += 1
        position_stagnation = self.get_position_stagnation()
        
        productive, reason = self.check_productive_change(context_state)
        if productive:
            self.on_productive_change(reason)
        
        if self.should_use_both_mode():
            return "both"
        
        if self.check_state_stagnation(context_state):
            self.apply_stagnation_initiator_penalty()
            new_mode = "interact" if self.control_mode == "move" else "move"
            self.control_mode = new_mode
            self.position_at_mode_swap = (context_state[0], context_state[1])
            self.on_mode_swap(self.control_mode, new_mode)
            self.state_stagnation_count = 0
            return self.control_mode
        
        raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
        raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
        current_map = int(context_state[2])
        
        tile_needs_probing = self.should_interact_at_tile(raw_x, raw_y, current_map)
        untried_directions = self.get_untried_directions(raw_x, raw_y, current_map)
        
        if tile_needs_probing and untried_directions and self.control_mode == "move" and self.frames_in_current_mode >= 3:
            self.control_mode = "interact"
            self.position_at_mode_swap = (context_state[0], context_state[1])
            self.frames_in_current_mode = 0
            return self.control_mode
        
        if self.control_mode == "move" and position_stagnation >= self.move_to_interact_threshold:
            self.control_mode = "interact"
            self.position_at_mode_swap = (context_state[0], context_state[1])
            self.on_mode_swap("move", "interact")
        elif self.control_mode == "interact":
            if (not tile_needs_probing or not untried_directions) and self.frames_in_current_mode >= 5:
                self.control_mode = "move"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.frames_in_current_mode = 0
            elif self.frames_in_current_mode >= self.interact_to_move_threshold:
                self.control_mode = "move"
                self.position_at_mode_swap = (context_state[0], context_state[1])
                self.on_mode_swap("interact", "move")
        
        return self.control_mode

    # =========================================================================
    # EXPLORATION TRACKING
    # =========================================================================
    
    def update_exploration_tracking(self, context_state, prev_context_state, raw_position=None, prev_raw_position=None):
        current_map = int(context_state[2])
        raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
        raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
        current_pos = (raw_x, raw_y)
        
        if self.current_map_id is not None and current_map != self.current_map_id:
            prev_map = self.current_map_id
            if prev_context_state is not None and prev_raw_position is not None:
                self.record_transition(prev_raw_position, prev_map, current_map,
                    int(prev_context_state[5]), 'interact' if self.last_action == 'A' else 'walk')
            if prev_raw_position is not None:
                entry_dir = int(context_state[5]) if prev_context_state is not None else 0
                self.create_transition_ban(current_map, current_pos, (entry_dir + 2) % 4)
            self.on_map_change(current_map)
        
        self.current_map_id = current_map
        self.record_visited_tile(raw_x, raw_y, current_map)
        self.accumulate_temp_debt(current_map)
        self.update_transition_ban(current_map, current_pos)
        self.check_ban_lift_conditions(current_map)
        
        if prev_context_state is not None and prev_raw_position is not None:
            self.detect_obstruction(prev_context_state, context_state, raw_position, prev_raw_position)
        
        self.check_interaction_verification(context_state, prev_context_state)
        self.last_direction = int(context_state[5])
        
        if self.timestep % 300 == 0:
            self.decay_all_debts()

    def on_map_change(self, new_map):
        self.save_exploration_memory()
        self.control_mode = "move"
        self.frames_in_current_mode = 0
        memory = self.get_current_map_memory(new_map)
        tile_interactions = memory.get('tile_interactions', {})
        print(f"  üó∫Ô∏è MAP CHANGE ‚Üí {new_map}: {len(memory['visited_tiles'])} visited, {len(memory['obstructions'])} obs")
        print(f"     Tiles probed: {len(tile_interactions)}, exhausted: {sum(1 for t in tile_interactions.values() if t.get('exhausted', False))}")

    # =========================================================================
    # REPETITION & PATTERN HANDLING
    # =========================================================================
    
    def track_consecutive_action(self, action_name):
        if action_name == self.current_repeated_action:
            self.consecutive_action_count += 1
        else:
            self.current_repeated_action = action_name
            self.consecutive_action_count = 1

    def get_learning_multiplier(self, action_name):
        if action_name != self.current_repeated_action or self.consecutive_action_count < self.LEARNING_SLOWDOWN_START:
            return 1.0
        progress = min(1.0, (self.consecutive_action_count - self.LEARNING_SLOWDOWN_START) / 
                       (self.LEARNING_SLOWDOWN_MAX - self.LEARNING_SLOWDOWN_START))
        return max(0.05, 1.0 - 0.95 * progress)

    def get_nth_highest_utility(self, group, n=3):
        utilities = sorted([a.utility for a in self.actions() if a.group == group], reverse=True)
        if len(utilities) < n:
            return self.INTERACT_UTILITY_FLOOR if group == "interact" else self.MOVE_UTILITY_FLOOR
        return utilities[n-1]

    def detect_pattern(self):
        if len(self.action_history) < 6:
            return None, 0
        recent = list(self.action_history)[-self.PATTERN_CHECK_WINDOW:]
        for pattern_len in range(1, self.PATTERN_MAX_LENGTH + 1):
            if len(recent) < pattern_len * self.PATTERN_MIN_REPEATS:
                continue
            candidate = tuple(recent[-pattern_len:])
            repeat_count, idx = 0, len(recent) - pattern_len
            while idx >= 0 and tuple(recent[idx:idx + pattern_len]) == candidate:
                repeat_count += 1
                idx -= pattern_len
            if repeat_count >= self.PATTERN_MIN_REPEATS:
                return candidate, repeat_count
        return None, 0

    def apply_pattern_penalty(self):
        pattern, repeat_count = self.detect_pattern()
        if pattern is None:
            self.detected_pattern, self.pattern_repeat_count = None, 0
            return
        self.detected_pattern, self.pattern_repeat_count = pattern, repeat_count
        for action_name in set(pattern):
            group = "interact" if action_name in ["A", "B", "Start", "Select"] else "move"
            third_util = self.get_nth_highest_utility(group, n=3)
            for a in self.actions():
                if a.action == action_name:
                    floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                    a.utility = max(third_util * 0.9, floor)
                    break

    def apply_repetition_penalty(self):
        if self.current_repeated_action is None:
            return
        for a in self.actions():
            if a.action == self.current_repeated_action:
                floor = self.INTERACT_UTILITY_FLOOR if a.group == "interact" else self.MOVE_UTILITY_FLOOR
                if self.consecutive_action_count >= self.HARD_RESET_THRESHOLD:
                    a.utility = max(self.get_nth_highest_utility(a.group, n=3) * 0.9, floor)
                    self.consecutive_action_count = 0
                elif self.consecutive_action_count >= self.PENALTY_THRESHOLD:
                    a.utility = max(a.utility * 0.7, floor)
                break

    # =========================================================================
    # ENTITY & LEARNING
    # =========================================================================
    
    def spawn_innate_entities(self, learning_state):
        if self.innate_entities_spawned:
            return
        for etype, indices in [("sense_menu", [5, 6]), ("sense_battle", [3, 4]), 
                                ("sense_movement", [0, 1]), ("sense_map_transition", [2])]:
            entity = Perceptron("entity", entity_type=etype)
            entity.ensure_weights(len(learning_state))
            entity.weights = np.zeros(len(learning_state))
            for idx in indices:
                entity.weights[idx] = 0.5 if len(indices) > 1 else 1.0
            self.add(entity)
        self.innate_entities_spawned = True

    def enforce_utility_floors(self):
        for a in self.actions():
            floor = self.MOVE_UTILITY_FLOOR if a.group == "move" else self.INTERACT_UTILITY_FLOOR
            a.utility = max(a.utility, floor)

    def get_spawn_threshold_adaptive(self, error_type='combined', percentile=50):
        history = {'numeric': self.numeric_error_history, 'visual': self.visual_error_history}.get(error_type, self.error_history)
        return max(0.001, np.percentile(history, percentile)) if len(history) >= 100 else 0.0005

    def stagnation_level(self, window=10):
        if len(self.prev_learning_states) < window:
            return 0.0
        recent = list(self.prev_learning_states)[-window:]
        return 1.0 - np.tanh(np.mean([np.linalg.norm(recent[i] - recent[i-1]) for i in range(1, len(recent))]) * 2.0)

    def predict_future_error(self, state, action, context_state, raw_position=None):
        entity_novelty = np.mean([e.predict(state) * e.utility for e in self.entities()]) if self.entities() else 0.5
        combined = entity_novelty * 0.7 + action.utility * 0.3
        
        current_map = int(context_state[2])
        loc = self.get_location_key(*(raw_position if raw_position else (context_state[0]*255, context_state[1]*255)), current_map)
        map_debt = min(self.map_novelty_debt.get(current_map, 0.0), self.MAX_MAP_DEBT)
        loc_debt = min(self.location_novelty.get(loc, 0.0), self.MAX_LOCATION_DEBT)
        total_debt = map_debt + self.get_temp_debt(current_map) + loc_debt * 0.5
        combined *= 1.0 / (1.0 + total_debt * 5.0)
        
        if action.action == self.current_repeated_action and self.consecutive_action_count > self.LEARNING_SLOWDOWN_START:
            combined *= 1.0 / (1.0 + (self.consecutive_action_count - self.LEARNING_SLOWDOWN_START) * 0.15)
        if self.detected_pattern and action.action in self.detected_pattern:
            combined *= 1.0 / (1.0 + self.pattern_repeat_count * 0.2)
        
        return combined + np.random.randn() * 0.05

    def compute_multi_modal_error(self, state, next_state):
        diffs = [abs(next_state[i] - state[i]) for i in range(min(8, len(state), len(next_state)))]
        weights = [0.5, 0.5, 10.0, 5.0, 3.0, 2.0, 1.5, 0.3]
        weighted = sum(d * w for d, w in zip(diffs, weights)) + np.linalg.norm(next_state[8:] - state[8:]) * 2.0
        numeric = sum(diffs)
        visual = np.linalg.norm(next_state[8:] - state[8:])
        return weighted, numeric, visual

    def learn(self, learning_state, next_learning_state, context_state, next_context_state, dead=False,
            raw_position=None, next_raw_position=None):
        if learning_state.shape != next_learning_state.shape:
            max_dim = max(len(learning_state), len(next_learning_state))
            learning_state = np.pad(learning_state, (0, max(0, max_dim - len(learning_state))))
            next_learning_state = np.pad(next_learning_state, (0, max(0, max_dim - len(next_learning_state))))
        
        if not self.innate_entities_spawned:
            self.spawn_innate_entities(learning_state)
        
        prev_context = self.prev_context_states[-1] if self.prev_context_states else None
        prev_raw = getattr(self, '_last_raw_position', None)
        self.update_exploration_tracking(context_state, prev_context, raw_position, prev_raw)
        self._last_raw_position = raw_position
        
        weighted_error, numeric_error, visual_error = self.compute_multi_modal_error(learning_state, next_learning_state)
        self.error_history.append(weighted_error)
        self.numeric_error_history.append(numeric_error)
        self.visual_error_history.append(visual_error)
        
        current_map = int(context_state[2])
        loc = self.get_location_key(*(raw_position if raw_position else (context_state[0]*255, context_state[1]*255)), current_map)
        
        self.visited_maps[current_map] = self.visited_maps.get(current_map, 0) + 1
        self.location_memory[loc] = self.location_memory.get(loc, 0) + 1
        
        if self.visited_maps[current_map] > 10:
            self.map_novelty_debt[current_map] = min(self.MAX_MAP_DEBT, 
                self.map_novelty_debt.get(current_map, 0.0) + 0.05 * (self.visited_maps[current_map] - 10))
        if self.location_memory[loc] > 15:
            self.location_novelty[loc] = min(self.MAX_LOCATION_DEBT,
                self.location_novelty.get(loc, 0.0) + 0.1 * (self.location_memory[loc] - 15))
        
        if self.visited_maps[current_map] > 30:
            weighted_error *= 0.5
        if self.location_memory[loc] > 25:
            weighted_error *= 0.7
        
        stagnation = self.stagnation_level()
        learning_mult = self.get_learning_multiplier(self.last_action) if self.last_action else 1.0
        if self.detected_pattern and self.last_action in self.detected_pattern:
            learning_mult *= 0.5
        
        for p in self.perceptrons:
            mult = learning_mult if (p.kind == "action" and p.action == self.last_action) else 1.0
            if p.kind == "action" and self.detected_pattern and p.action in self.detected_pattern:
                mult *= 0.5
            p.update(learning_state, weighted_error * mult, stagnation=stagnation)
        
        self.apply_repetition_penalty()
        self.apply_pattern_penalty()
        self.enforce_utility_floors()
        
        if prev_context is not None and np.linalg.norm(context_state[:2] - prev_context[:2]) > 0.001:
            if self.last_action:
                for a in self.actions():
                    if a.action == self.last_action:
                        boost = 1.15 if raw_position and self.is_near_map_edge(*raw_position) else 1.08
                        a.utility = min(a.utility * boost, 2.0)
                        break
        
        if self.timestep % self.SAVE_INTERVAL == 0:
            self.save_exploration_memory()
        
        self.action_history.append(self.last_action)

    def log_state(self, learning_state, context_state):
        self.prev_learning_states.append(learning_state)
        self.prev_context_states.append(context_state)

    def update_position(self, x, y):
        self.last_positions.append((int(x), int(y)))

    def get_tile_interaction_stats(self, map_id):
        memory = self.get_current_map_memory(map_id)
        tile_interactions = memory.get('tile_interactions', {})
        return {
            'probed': len(tile_interactions),
            'exhausted': sum(1 for t in tile_interactions.values() if t.get('exhausted', False)),
            'with_success': sum(1 for t in tile_interactions.values() if any(t.get('direction_successes', {}).get(d, 0) > 0 for d in range(4)))
        }

    # =========================================================================
    # TEACHING MODE - LEARNING FROM HUMAN (NEW)
    # =========================================================================
        
    def learn_from_human_action(self, learning_state, human_action, context_state):
        """Learn from human's action choice."""
        if human_action is None or human_action == "NONE":
            return
        
        self.demonstration_count += 1
        
        # Detect context
        context = self._detect_context(context_state)
        
        # Track context-action statistics
        context_key = f"{context}_{int(context_state[2])}"
        if context_key not in self.context_action_stats:
            self.context_action_stats[context_key] = {}
        
        if human_action not in self.context_action_stats[context_key]:
            self.context_action_stats[context_key][human_action] = 0
        self.context_action_stats[context_key][human_action] += 1
        
        # Boost utility of human's chosen action
        for a in self.actions():
            if a.action == human_action:
                old_utility = a.utility
                a.utility = min(a.utility * 1.05, 2.0)
                
                if self.demonstration_count % 50 == 0:
                    print(f"  üìö Learning: {human_action} utility {old_utility:.3f} ‚Üí {a.utility:.3f}")
                break
        
        # Update weights toward predicting this action
        for a in self.actions():
            if a.action == human_action:
                a.ensure_weights(len(learning_state))
                teaching_error = 0.1
                a.update(learning_state, teaching_error, stagnation=0.0)
            else:
                a.ensure_weights(len(learning_state))
                teaching_error = -0.02
                a.update(learning_state, teaching_error, stagnation=0.0)

    def _detect_context(self, context_state):
        """Determine game context from state."""
        if context_state[3] > 0.5:
            return "battle"
        elif context_state[4] > 0.5:
            return "menu"
        else:
            return "overworld"

    def print_teaching_stats(self):
        """Print what AI has learned from human."""
        if not self.context_action_stats:
            return
        
        print(f"\n{'='*70}")
        print(f"üìö TEACHING STATISTICS (Total demonstrations: {self.demonstration_count})")
        print(f"{'='*70}")
        
        for context_key, actions in self.context_action_stats.items():
            total = sum(actions.values())
            print(f"\n  Context: {context_key}")
            sorted_actions = sorted(actions.items(), key=lambda x: x[1], reverse=True)
            for action, count in sorted_actions[:5]:
                pct = (count / total) * 100
                print(f"    {action}: {count} times ({pct:.1f}%)")

    # =========================================================================
    # MODEL SAVING/LOADING
    # =========================================================================

    def save_model(self, filepath=None):
        """Save model with compressed weights."""
        if filepath is None:
            filepath = MODEL_FILE
        
        actions_data = []
        for a in self.actions():
            if a.weights is not None:
                nonzero_indices = np.where(np.abs(a.weights) > 1e-6)[0]
                nonzero_weights = [(int(idx), float(a.weights[idx])) for idx in nonzero_indices]
                
                actions_data.append({
                    "action": a.action,
                    "group": a.group,
                    "utility": float(a.utility),
                    "weights_shape": int(len(a.weights)),
                    "weights_nonzero": nonzero_weights,
                    "learning_rate": float(a.learning_rate),
                    "familiarity": float(a.familiarity)
                })
        
        entities_data = []
        for e in self.entities():
            if e.weights is not None:
                nonzero_indices = np.where(np.abs(e.weights) > 1e-6)[0]
                nonzero_weights = [(int(idx), float(e.weights[idx])) for idx in nonzero_indices]
                
                entities_data.append({
                    "entity_type": e.entity_type,
                    "utility": float(e.utility),
                    "weights_shape": int(len(e.weights)),
                    "weights_nonzero": nonzero_weights,
                    "familiarity": float(e.familiarity)
                })
        
        debt_data = {
            "map_novelty_debt": {int(k): float(v) for k, v in self.map_novelty_debt.items()},
            "location_novelty": {str(k): float(v) for k, v in self.location_novelty.items()},
            "visited_maps": {int(k): int(v) for k, v in self.visited_maps.items()}
        }
        
        teaching_data = {
            "demonstration_count": int(self.demonstration_count),
            "context_action_stats": {k: {ak: int(av) for ak, av in v.items()} 
                                        for k, v in self.context_action_stats.items()}
        }
        
        model_data = {
            "timestep": int(self.timestep),
            "perceptrons": {
                "actions": actions_data,
                "entities": entities_data
            },
            "debt_tracking": debt_data,
            "teaching_stats": teaching_data,
            "control_mode": self.control_mode
        }
        
        try:
            with open(filepath, 'w') as f:
                json.dump(model_data, f, indent=2)
            print(f"\nüíæ MODEL SAVED: {filepath}")
            print(f"   Timestep: {self.timestep}")
            print(f"   Actions: {len(actions_data)} perceptrons")
            print(f"   Entities: {len(entities_data)} perceptrons")
            print(f"   Demonstrations: {self.demonstration_count}")
        except Exception as e:
            print(f"‚ùå Error saving model: {e}")

    def load_model(self, filepath=None):
        """Load model with compressed weights."""
        if filepath is None:
            filepath = MODEL_FILE
        
        if not filepath.exists():
            print(f"‚ÑπÔ∏è  No saved model found at {filepath}")
            return False
        
        try:
            with open(filepath, 'r') as f:
                model_data = json.load(f)
            
            self.timestep = model_data.get("timestep", 0)
            self.control_mode = model_data.get("control_mode", "move")
            
            actions_data = model_data.get("perceptrons", {}).get("actions", [])
            for a_data in actions_data:
                matching = [a for a in self.actions() if a.action == a_data["action"]]
                if matching:
                    a = matching[0]
                    a.utility = a_data["utility"]
                    a.learning_rate = a_data.get("learning_rate", 0.01)
                    a.familiarity = a_data.get("familiarity", 0.0)
                    
                    weights_shape = a_data["weights_shape"]
                    a.weights = np.zeros(weights_shape)
                    for idx, val in a_data["weights_nonzero"]:
                        if idx < weights_shape:
                            a.weights[idx] = val
            
            entities_data = model_data.get("perceptrons", {}).get("entities", [])
            for e_data in entities_data:
                matching = [e for e in self.entities() if e.entity_type == e_data["entity_type"]]
                if matching:
                    e = matching[0]
                    e.utility = e_data["utility"]
                    e.familiarity = e_data.get("familiarity", 0.0)
                    
                    weights_shape = e_data["weights_shape"]
                    e.weights = np.zeros(weights_shape)
                    for idx, val in e_data["weights_nonzero"]:
                        if idx < weights_shape:
                            e.weights[idx] = val
            
            debt_data = model_data.get("debt_tracking", {})
            self.map_novelty_debt = {int(k): float(v) for k, v in debt_data.get("map_novelty_debt", {}).items()}
            self.location_novelty = {eval(k) if k.startswith('(') else k: float(v) 
                                        for k, v in debt_data.get("location_novelty", {}).items()}
            self.visited_maps = {int(k): int(v) for k, v in debt_data.get("visited_maps", {}).items()}
            
            teaching_data = model_data.get("teaching_stats", {})
            self.demonstration_count = teaching_data.get("demonstration_count", 0)
            self.context_action_stats = teaching_data.get("context_action_stats", {})
            
            print(f"\n‚úÖ MODEL LOADED: {filepath}")
            print(f"   Timestep: {self.timestep}")
            print(f"   Actions: {len(actions_data)} perceptrons")
            print(f"   Entities: {len(entities_data)} perceptrons")
            print(f"   Demonstrations: {self.demonstration_count}")
            
            return True
            
        except Exception as e:
            print(f"‚ùå Error loading model: {e}")
            import traceback
            traceback.print_exc()
            return False

In [9]:
# # ============================================================================
# # CELL 4: Action Selection - Updated with All Fixes
# # ============================================================================
# # CHANGES:
# # 1. Added FORCED_EXPLORE_PROB (18%) for random exploration
# # 2. Added "both" mode handling - allows all actions when stuck
# # 3. Added turn-for-probing override - allows turns even in interact mode
# # ============================================================================

# import random  # Add to imports if not present

# GBA_ACTIONS = ["Up", "Down", "Left", "Right", "A", "B", "Start", "Select"]
# ACTION_DELTAS = {"UP": (0, -1), "DOWN": (0, 1), "LEFT": (-1, 0), "RIGHT": (1, 0)}
# DIRECTION_TO_ACTION = {0: "DOWN", 1: "UP", 2: "LEFT", 3: "RIGHT"}
# ACTION_TO_DIRECTION = {"DOWN": 0, "UP": 1, "LEFT": 2, "RIGHT": 3}

# def manhattan_distance(pos1, pos2):
#     return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])


# def anticipatory_action(brain, learning_state, context_state, 
#                        exploration_weight=1.3, min_interact_prob=0.15,
#                        raw_position=None,
#                        forced_explore_prob=0.18):  # NEW: 18% forced random
#     """
#     Action selection with all fixes:
#     1. Forced random exploration (18%)
#     2. "Both" mode when extremely stuck
#     3. Turn-for-probing override
#     4. Tile-based interaction probing
#     5. Novelty-driven movement
#     """
#     actions_list = brain.actions()
#     if not actions_list:
#         return Perceptron("action", action="UP", group="move")

#     mode = brain.determine_control_mode(context_state, raw_position=raw_position)
#     current_map = int(context_state[2])
#     current_dir = int(context_state[5])
    
#     raw_x = raw_position[0] if raw_position else int(context_state[0] * 255)
#     raw_y = raw_position[1] if raw_position else int(context_state[1] * 255)
#     current_pos = (raw_x, raw_y)
    
#     # Get exploration memory
#     memory = brain.get_current_map_memory(current_map)
#     visited_tiles = memory['visited_tiles']
#     obstructions = memory['obstructions']
    
#     # Get tile interaction state
#     tile_needs_probing = brain.should_interact_at_tile(raw_x, raw_y, current_map)
    
#     # NEW: Get best probe action (handles turn-then-interact)
#     probe_action, probe_dir = brain.get_best_probe_action(raw_x, raw_y, current_map, current_dir)
    
#     # Get transition info
#     transition_attraction, best_transition = brain.get_transition_attraction(current_map)
#     coverage = brain.get_exploration_coverage(current_map)
    
#     # === BUILD ALLOWED ACTIONS LIST ===
#     if mode == "battle":
#         # In battle, use group weights to decide
#         move_weight = brain.get_group_weight("move")
#         interact_weight = brain.get_group_weight("interact")
#         total = move_weight + interact_weight + 1e-9
#         if random.random() < move_weight / total:
#             allowed = [a for a in actions_list if a.group == "move"]
#         else:
#             allowed = [a for a in actions_list if a.group == "interact"]
#         all_actions = actions_list  # Fallback
        
#     elif mode == "both":
#         # NEW: "Both" mode - allow everything
#         allowed = actions_list
#         all_actions = actions_list
        
#     elif mode == "interact":
#         allowed = [a for a in actions_list if a.group == "interact"]
#         all_actions = None
        
#         # NEW: Turn-for-probing override
#         # If we need to turn to probe, allow that movement action
#         if probe_action and probe_action in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
#             turn_actions = [a for a in actions_list if a.action == probe_action]
#             if turn_actions:
#                 # Add the turn action to allowed list
#                 allowed = allowed + turn_actions
        
#     else:  # move
#         allowed = [a for a in actions_list if a.group == "move"]
#         all_actions = None

#     if not allowed:
#         allowed = actions_list

#     # === NEW: FORCED RANDOM EXPLORATION (18%) ===
#     if random.random() < forced_explore_prob:
#         chosen = random.choice(allowed)
#         brain.record_action_execution(chosen.action)
#         brain.track_consecutive_action(chosen.action)
        
#         # Still start interaction verification if it's an A press on a probeable tile
#         if chosen.action == 'A' and tile_needs_probing:
#             brain.start_interaction_verification(raw_x, raw_y, current_map, current_dir)
        
#         return chosen

#     # === SCORE ACTIONS ===
#     action_scores = []
    
#     for a in allowed:
#         predicted = brain.predict_future_error(learning_state, a, context_state, raw_position=raw_position)
        
#         # --- MOVE ACTIONS ---
#         if a.group == "move":
#             if mode in ["move", "both"]:
#                 predicted *= exploration_weight
            
#             dx, dy = ACTION_DELTAS.get(a.action, (0, 0))
#             target_tile = (raw_x + dx, raw_y + dy)
#             action_direction = ACTION_TO_DIRECTION.get(a.action, -1)
            
#             # BONUS: Unvisited tile
#             if target_tile not in visited_tiles:
#                 predicted *= brain.UNVISITED_TILE_BONUS
            
#             # PENALTY: Known obstruction
#             if target_tile in obstructions:
#                 predicted *= brain.OBSTRUCTION_PENALTY
            
#             # PENALTY: Transition ban
#             if brain.is_position_banned(current_map, raw_x, raw_y, action_direction):
#                 predicted *= 0.05
            
#             # BONUS: Toward transition when well-explored
#             if transition_attraction > 0.3 and best_transition and coverage > 0.5:
#                 trans_pos = tuple(best_transition['position']) if isinstance(best_transition['position'], list) else best_transition['position']
#                 if manhattan_distance(target_tile, trans_pos) < manhattan_distance(current_pos, trans_pos):
#                     predicted *= (1.0 + transition_attraction)
            
#             # NEW: If this is a turn needed for probing, boost it
#             if probe_action == a.action and probe_dir is not None:
#                 predicted *= 2.0  # Strong boost for needed turn
            
#             # Random factor for variety
#             predicted *= (0.9 + random.random() * 0.2)
        
#         # --- INTERACT ACTIONS ---
#         elif a.group == "interact":
#             predicted = max(predicted, min_interact_prob)
            
#             # Menu trap B-boost
#             if a.action == 'B':
#                 predicted *= brain.menu_trap_b_boost
            
#             # A-press logic
#             if a.action == 'A':
#                 if tile_needs_probing and probe_action == 'A':
#                     # We're facing an untried direction - strong boost!
#                     predicted *= 3.0
#                 elif tile_needs_probing:
#                     # Tile needs probing but we need to turn first
#                     predicted *= 0.5  # Mild penalty - turn should happen instead
#                 else:
#                     # Tile exhausted
#                     predicted *= 0.1
            
#             # Start/Select - always penalize, no boost
#             if a.action in ['Start', 'Select']:
#                 predicted *= 0.3
        
#         action_scores.append((a, predicted))

#     # === SELECT BEST ===
#     if action_scores:
#         best_action = max(action_scores, key=lambda x: x[1])[0]
#         best_score = max(s for _, s in action_scores)
        
#         if best_score > 0.01:
#             brain.record_action_execution(best_action.action)
#             brain.track_consecutive_action(best_action.action)
            
#             # Start interaction verification for A-press on probeable tile
#             if best_action.action == 'A' and tile_needs_probing:
#                 brain.start_interaction_verification(raw_x, raw_y, current_map, current_dir)
            
#             return best_action
    
#     # === FALLBACKS ===
    
#     # Battle fallback
#     if mode == "battle" and all_actions:
#         all_scores = [(a, brain.predict_future_error(learning_state, a, context_state, raw_position=raw_position)) 
#                       for a in all_actions]
#         if all_scores:
#             best_action = max(all_scores, key=lambda x: x[1])[0]
#             brain.record_action_execution(best_action.action)
#             brain.track_consecutive_action(best_action.action)
#             return best_action
    
#     # Move fallback: prefer unvisited
#     if mode in ["move", "both"]:
#         for a in allowed:
#             if a.group == "move":
#                 dx, dy = ACTION_DELTAS.get(a.action, (0, 0))
#                 target = (raw_x + dx, raw_y + dy)
#                 if target not in visited_tiles and target not in obstructions:
#                     brain.record_action_execution(a.action)
#                     brain.track_consecutive_action(a.action)
#                     return a
    
#     # Generic fallback
#     if allowed:
#         best = max(allowed, key=lambda a: a.utility)
#         brain.record_action_execution(best.action)
#         brain.track_consecutive_action(best.action)
#         return best
    
#     best = max(actions_list, key=lambda a: a.utility)
#     brain.record_action_execution(best.action)
#     brain.track_consecutive_action(best.action)
#     return best

In [10]:
# ============================================================================
# CELL 6: Main Loop - Updated with New Features
# ============================================================================
# CHANGES:
# 1. Added forced_explore_prob parameter to anticipatory_action call
# 2. Updated logging to show "both" mode status
# 3. Added debt decay status logging
# ============================================================================

brain = Brain()

# Action perceptrons
for b in ["UP", "DOWN", "LEFT", "RIGHT"]:
    brain.add(Perceptron("action", action=b, group="move"))
for b in ["A", "B", "Start", "Select"]:
    brain.add(Perceptron("action", action=b, group="interact"))

# Try to load existing model
brain.load_model()

exploration_weight = 1.3
prev_context_state = None
prev_raw_position = None

print("="*70)
print("üéì TEACHING MODE - AI Learning from Human")
print("="*70)
print("INSTRUCTIONS:")
print("  - You control the game with your controller/keyboard")
print("  - AI observes your actions and learns")
print("  - Exploration memory is being recorded")
print("  - Model auto-saves every 500 steps")
print("="*70)
print(f"PERSISTENT MEMORY: {brain.EXPLORATION_MEMORY_FILE}")
print(f"MODEL CHECKPOINT: {MODEL_FILE}")
print("="*70)

while True:
    # Read state (now includes human_action)
    context_state, palette_state, tile_state, dead, raw_position, human_action = read_game_state()
    
    raw_x, raw_y = raw_position
    in_battle = context_state[3]
    current_map = int(context_state[2])
    current_dir = int(context_state[5])
    
    brain.update_position(raw_x, raw_y)

    derived = compute_derived_features(context_state, prev_context_state)
    learning_state = build_learning_state(derived, palette_state, tile_state, in_battle)
    
    brain.log_state(learning_state, context_state)
    
    # TEACHING MODE: Learn from human's action
    if human_action and human_action != "NONE":
        brain.learn_from_human_action(learning_state, human_action, context_state)
        brain.last_action = human_action
        brain.record_action_execution(human_action)

    # === LOGGING ===
    if brain.timestep % 100 == 0:
        memory = brain.get_current_map_memory(current_map)
        visited_count = len(memory['visited_tiles'])
        obs_count = len(memory['obstructions'])
        interactables = len(memory['interactable_objects'])
        coverage = brain.get_exploration_coverage(current_map)
        transitions = memory.get('transitions', [])
        
        dir_name = brain.DIRECTION_NAMES.get(current_dir, '?')
        
        print(f"\n{'='*70}")
        print(f"Step {brain.timestep} | Map {current_map} | Pos ({raw_x}, {raw_y}) facing {dir_name}")
        print(f"  Human Action: {human_action if human_action else 'NONE'}")
        print(f"  Demonstrations: {brain.demonstration_count}")
        
        # Exploration status
        print(f"\n  üìä EXPLORATION:")
        print(f"     Visited: {visited_count} | Obstructions: {obs_count} | Coverage: {coverage:.0%}")
        print(f"     Interactables found: {interactables}")
        
        # Transitions
        if transitions:
            print(f"\n  üö™ TRANSITIONS: {len(transitions)} known")
            for t in transitions[:3]:
                pos = tuple(t['position']) if isinstance(t['position'], list) else t['position']
                print(f"     ({pos[0]},{pos[1]}) ‚Üí Map {t['destination_map']} (used {t['use_count']}x)")
        
        # Utilities
        action_utils = sorted([(a.action, a.utility) for a in brain.actions()], key=lambda x: x[1], reverse=True)
        print(f"\n  ‚ö° Utilities: {' '.join([f'{k}:{v:.2f}' for k,v in action_utils])}")

    # === MILESTONES & STATS ===
    if brain.timestep % 500 == 0 and brain.timestep > 0:
        total_visited = sum(len(m['visited_tiles']) for m in brain.exploration_memory.values())
        total_obs = sum(len(m['obstructions']) for m in brain.exploration_memory.values())
        total_interactables = sum(len(m['interactable_objects']) for m in brain.exploration_memory.values())
        total_transitions = sum(len(m.get('transitions', [])) for m in brain.exploration_memory.values())
        
        print(f"\n{'#'*70}")
        print(f"# MILESTONE {brain.timestep}")
        print(f"# Maps explored: {len(brain.exploration_memory)}")
        print(f"# Tiles visited: {total_visited} | Obstructions: {total_obs}")
        print(f"# Interactables: {total_interactables} | Transitions: {total_transitions}")
        print(f"# Demonstrations: {brain.demonstration_count}")
        print(f"{'#'*70}")
        
        # Print teaching statistics
        brain.print_teaching_stats()
        
        # Auto-save model
        brain.save_model()

    time.sleep(0.02)

    # Learn from state transitions (as before)
    next_context, next_palette, next_tiles, dead, next_raw_position, _ = read_game_state()  # ‚Üê FIXED
    next_in_battle = next_context[3]
    next_derived = compute_derived_features(next_context, context_state)
    next_learning_state = build_learning_state(next_derived, next_palette, next_tiles, next_in_battle)

    brain.learn(learning_state, next_learning_state, context_state, next_context, dead=dead, 
                raw_position=raw_position, next_raw_position=next_raw_position)

    prev_context_state = context_state.copy()
    prev_raw_position = raw_position
    brain.timestep += 1

  Loaded exploration memory: 4 maps
‚ùå Error loading model: Expecting value: line 1 column 1 (char 0)
üéì TEACHING MODE - AI Learning from Human
INSTRUCTIONS:
  - You control the game with your controller/keyboard
  - AI observes your actions and learns
  - Exploration memory is being recorded
  - Model auto-saves every 500 steps
PERSISTENT MEMORY: C:\Users\natmaw\Documents\Boston Stuff\CS 5100 Foundations of AI\cogai\exploration_memory.json
MODEL CHECKPOINT: C:\Users\natmaw\Documents\Boston Stuff\CS 5100 Foundations of AI\cogai\model_checkpoint.json

Step 0 | Map 17 | Pos (16, 9) facing RIGHT
  Human Action: NONE
  Demonstrations: 0

  üìä EXPLORATION:
     Visited: 62 | Obstructions: 72 | Coverage: 46%
     Interactables found: 0

  üö™ TRANSITIONS: 60 known
     (17,9) ‚Üí Map 0 (used 96x)
     (16,9) ‚Üí Map 0 (used 11x)
     (18,9) ‚Üí Map 0 (used 9x)

  ‚ö° Utilities: UP:1.00 DOWN:1.00 LEFT:1.00 RIGHT:1.00 A:1.00 B:1.00 Start:1.00 Select:1.00


Traceback (most recent call last):
  File "C:\Users\natmaw\AppData\Local\Temp\ipykernel_14988\4196294198.py", line 1183, in load_model
    model_data = json.load(f)
                 ^^^^^^^^^^^^
  File "c:\Users\natmaw\anaconda3\envs\mlp\Lib\json\__init__.py", line 293, in load
    return loads(fp.read(),
           ^^^^^^^^^^^^^^^^
  File "c:\Users\natmaw\anaconda3\envs\mlp\Lib\json\__init__.py", line 346, in loads
    return _default_decoder.decode(s)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\natmaw\anaconda3\envs\mlp\Lib\json\decoder.py", line 338, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\natmaw\anaconda3\envs\mlp\Lib\json\decoder.py", line 356, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)



Step 100 | Map 17 | Pos (16, 9) facing RIGHT
  Human Action: NONE
  Demonstrations: 0

  üìä EXPLORATION:
     Visited: 62 | Obstructions: 72 | Coverage: 46%
     Interactables found: 0

  üö™ TRANSITIONS: 60 known
     (17,9) ‚Üí Map 0 (used 96x)
     (16,9) ‚Üí Map 0 (used 11x)
     (18,9) ‚Üí Map 0 (used 9x)

  ‚ö° Utilities: UP:0.59 DOWN:0.59 LEFT:0.59 RIGHT:0.59 A:0.59 B:0.59 Start:0.59 Select:0.59

Step 200 | Map 17 | Pos (16, 9) facing RIGHT
  Human Action: NONE
  Demonstrations: 0

  üìä EXPLORATION:
     Visited: 62 | Obstructions: 72 | Coverage: 46%
     Interactables found: 0

  üö™ TRANSITIONS: 60 known
     (17,9) ‚Üí Map 0 (used 96x)
     (16,9) ‚Üí Map 0 (used 11x)
     (18,9) ‚Üí Map 0 (used 9x)

  ‚ö° Utilities: UP:0.59 DOWN:0.59 LEFT:0.59 RIGHT:0.59 A:0.59 B:0.59 Start:0.59 Select:0.59

Step 300 | Map 17 | Pos (16, 9) facing RIGHT
  Human Action: NONE
  Demonstrations: 0

  üìä EXPLORATION:
     Visited: 62 | Obstructions: 72 | Coverage: 46%
     Interactables f

KeyboardInterrupt: 