In [1]:
from pathlib import Path
import json
import numpy as np
import time
import random

# --- File paths (same folder as notebook & Lua script) ---
BASE_PATH = Path("C:/Users/natmaw/Documents/Boston Stuff/CS 5100 Foundations of AI/cogai/")
ACTION_FILE = BASE_PATH / "action.json"
STATE_FILE  = BASE_PATH / "game_state.json"

# --- FireRed state vector size ---
# Lua always writes: [x, y, map, hp_cur, hp_max]
EXPECTED_STATE_DIM = 9


In [2]:
def read_game_state():
    if not STATE_FILE.exists():
        return np.zeros(EXPECTED_STATE_DIM, dtype=float), False

    try:
        with open(STATE_FILE, "r") as f:
            data = json.load(f)

        raw = data.get("state", [])
        dead = bool(data.get("dead", False))

        raw_state = np.array(raw, dtype=float)

    except Exception:
        # Any parse error → safe fallback
        return np.zeros(EXPECTED_STATE_DIM, dtype=float), False

    # --- HARD PAD / TRIM (authoritative fix) ---
    if raw_state.shape[0] < EXPECTED_STATE_DIM:
        pad = np.zeros(EXPECTED_STATE_DIM - raw_state.shape[0], dtype=float)
        raw_state = np.concatenate([raw_state, pad])
    elif raw_state.shape[0] > EXPECTED_STATE_DIM:
        raw_state = raw_state[:EXPECTED_STATE_DIM]

    return raw_state, dead


In [3]:
def write_action(action_name):
    with open(ACTION_FILE, "w") as f:
        json.dump({"action": action_name}, f)


In [4]:
class Perceptron:
    def __init__(self, kind, action=None, group=None):
        self.kind = kind
        self.action = action
        self.group = group  # "move" or "interact" or None
        self.utility = 1.0
        self.weights = None
        self.eligibility = 0.0

    def ensure_weights(self, dim):
        if self.weights is None:
            self.weights = np.random.randn(dim) * 0.01

    def predict(self, state):
        self.ensure_weights(len(state))
        return np.dot(self.weights, state)

    def update(self, state, error, gamma=0.9, stagnation=0.0):
        self.ensure_weights(len(state))
        self.eligibility = gamma * self.eligibility + 1.0
        lr = 0.03
        self.weights += lr * error * state * self.eligibility

        if self.kind == "action":
            if stagnation > 0.5:
                self.utility *= 0.97
            else:
                self.utility *= 0.995
        self.utility = np.clip(self.utility, 0.01, 2.0)

In [5]:
class Brain:
    def __init__(self):
        self.perceptrons = []
        self.prev_states = []
        self.last_positions = []
        self.movement_stagnation = 0
        self.control_mode = "move"
        self.timestep = 0

    def add(self, p):
        self.perceptrons.append(p)

    def actions(self):
        return [p for p in self.perceptrons if p.kind == "action"]

    def entities(self):
        return [p for p in self.perceptrons if p.kind == "entity"]

    def stagnation_level(self, window=10):
        if len(self.prev_states) < window:
            return 0.0
        diffs = [
            np.linalg.norm(self.prev_states[i] - self.prev_states[i - 1])
            for i in range(1, window)
        ]
        return 1.0 - np.tanh(sum(diffs))

    def predict_future_error(self, state, action):
        novelty = sum(e.predict(state) for e in self.entities())
        noise = np.random.randn() * 0.05
        return novelty + noise

    def learn(self, state, next_state, dead=False):
        if state.shape != next_state.shape:
            return
        error = np.linalg.norm(next_state - state)
        if dead:
            error *= 0.5
        stagnation = self.stagnation_level()
        for p in self.perceptrons:
            p.update(state, -error, stagnation=stagnation)
        if abs(error) > 0.3:
            self.add(Perceptron("entity"))
        self.perceptrons = [p for p in self.perceptrons if p.utility > 0.05]

    def log_state(self, state):
        self.prev_states.append(state)
        if len(self.prev_states) > 50:
            self.prev_states.pop(0)

    def update_position(self, x, y, interaction_threshold=5, menu_threshold=15):
        self.last_positions.append((x, y))
        if len(self.last_positions) > menu_threshold:
            self.last_positions.pop(0)

        unique_positions = len(set(self.last_positions))
        if unique_positions <= 1:
            self.movement_stagnation += 1
        else:
            self.movement_stagnation = 0

        if self.movement_stagnation >= menu_threshold:
            self.control_mode = "both"
        elif self.movement_stagnation >= interaction_threshold:
            self.control_mode = "interact"
        else:
            self.control_mode = "move"

In [6]:
GBA_ACTIONS = [
    "Up", "Down", "Left", "Right",
    "A", "B", "Start", "Select"
]


In [7]:
def anticipatory_action(
    brain,
    state,
    exploration_weight=1.5,
    forced_explore_prob=0.4,
    explore_bonus=0.3,
    min_interact_prob=0.2  # ensures interactions never vanish
):
    actions_list = brain.actions()
    if not actions_list:
        return random.choice([Perceptron("action", action=a, group="move") for a in ["UP","DOWN","LEFT","RIGHT","A","B","Start","Select"]])

    # Control filtering
    allowed = []
    for a in actions_list:
        if brain.control_mode == "move" and a.group != "move":
            continue
        if brain.control_mode == "interact" and a.group != "interact":
            continue
        allowed.append(a)
    if not allowed:
        # fallback: allow all
        allowed = actions_list

    # Forced exploration
    if random.random() < forced_explore_prob:
        return random.choice(allowed)

    best_action, best_score = None, -np.inf
    for a in allowed:
        predicted = brain.predict_future_error(state, a)

        # separate bias for movement vs interaction
        if a.group == "move":
            predicted *= exploration_weight
            predicted += explore_bonus
        elif a.group == "interact":
            predicted = max(predicted, min_interact_prob)  # never too low

        if predicted > best_score:
            best_score = predicted
            best_action = a

    # final fallback: if something went wrong
    if best_action is None:
        best_action = random.choice(allowed)

    return best_action


In [8]:
def temporal_state(prev_states, current_state, window, alpha):
    history = prev_states[-(window-1):] if prev_states else []

    while len(history) < (window-1):
        history.insert(0, np.zeros(EXPECTED_STATE_DIM, dtype=float))

    history.append(current_state)

    weighted = [
        s * (alpha ** (window - 1 - i))
        for i, s in enumerate(history)
    ]

    return np.concatenate(weighted)


In [None]:
# --- Brain & Perceptrons setup ---
brain = Brain()

# interaction & move groups
interact_buttons = ["A", "B", "Start", "Select"]
move_buttons = ["UP", "DOWN", "LEFT", "RIGHT"]

for b in move_buttons:
    brain.add(Perceptron("action", action=b, group="move"))
for b in interact_buttons:
    brain.add(Perceptron("action", action=b, group="interact"))

# exploration objectives
brain.add(Perceptron("objective"))
brain.add(Perceptron("objective"))

# --- Hyperparameters ---
temporal_window = 3
alpha_decay = 0.7
exploration_weight = 1.5
forced_explore_prob = 0.4

prev_states = []

print("AI CONTROL STARTED — switch to BizHawk window")

while True:
    raw_state, dead = read_game_state()

    # --- Update position & control mode ---
    x, y = raw_state[0], raw_state[1]
    brain.update_position(x, y)

    prev_states.append(raw_state)
    if len(prev_states) > temporal_window:
        prev_states.pop(0)

    state = temporal_state(prev_states, raw_state, temporal_window, alpha_decay)
    brain.log_state(state)

    action = anticipatory_action(
        brain,
        state,
        exploration_weight=exploration_weight,
        forced_explore_prob=forced_explore_prob
    )

    if action is not None:
        write_action(action.action)
    else:
        write_action("NONE")

    time.sleep(0.03)  # ~30 FPS

    next_raw, dead = read_game_state()
    next_state = temporal_state(prev_states, next_raw, temporal_window, alpha_decay)

    brain.learn(state, next_state, dead=dead)

    brain.timestep += 1


AI CONTROL STARTED — switch to BizHawk window
