In [1]:
import serial
import json
import numpy as np
import time

ser = serial.Serial('/dev/ttyUSB1', 115200, timeout=1)

ACTIONS = [0, 1, 2]   # forward, left, right

def get_state():
    line = ser.readline().decode().strip()
    if not line:
        return None
    try:
        data = json.loads(line)
        return np.array([data["L"], data["C"], data["R"]])
    except:
        return None


def send_action(a):
    ser.write((str(a) + "\n").encode())



def reward_function(state):
    L, C, R = state

    # premio por avanzar sin obstaculos
    if C == 0:
        return 1.0

    # penalizar si hay obst치culo de frente
    if C == 1:
        return -5.0

    return -0.1


# -------------------------
#  LOOP RL B츼SICO
# -------------------------
from random import choice

Q = np.zeros((8, 3))  # 8 estados, 3 acciones
alpha = 0.3
gamma = 0.9
epsilon = 0.2

def state_to_index(s):
    return s[0]*4 + s[1]*2 + s[2]*1

while True:
    state = get_state()
    if state is None:
        continue

    idx = state_to_index(state)

    # pol칤tica epsilon-greedy
    if np.random.rand() < epsilon:
        action = choice(ACTIONS)
    else:
        action = np.argmax(Q[idx])

    send_action(action)

    time.sleep(0.15)

    new_state = get_state()
    if new_state is None:
        continue

    reward = reward_function(new_state)

    new_idx = state_to_index(new_state)

    # Q-learning
    Q[idx, action] = Q[idx, action] + alpha * (
        reward + gamma * np.max(Q[new_idx]) - Q[idx, action]
    )

    print("State:", state, "Action:", action, "Reward:", reward)


KeyboardInterrupt: 

In [None]:
import serial
import json
import numpy as np
import time
import os
from random import choice

# --- cargar Q-table ---
if os.path.exists("qtable.npy"):
    Q = np.load("qtable.npy")
    print("Q-table cargada desde archivo.")
else:
    Q = np.zeros((8, 3))
    print("Q-table nueva creada.")

ser = serial.Serial('/dev/ttyUSB0', 115200, timeout=1)

ACTIONS = [0, 1, 2]   # 0=avanzar, 1=izq, 2=der
alpha = 0.3
gamma = 0.9
epsilon = 0.2


def get_state():
    try:
        line = ser.readline().decode().strip()
        if not line:
            return None
        data = json.loads(line)
        return np.array([data["L"], data["C"], data["R"]])
    except:
        return None


def send_action(a):
    ser.write((str(a) + "\n").encode())


def reward_function(state):
    L, C, R = state
    if C == 0:
        return 1.0
    if C == 1:
        return -5.0
    return -0.1


def state_to_index(s):
    return s[0] * 4 + s[1] * 2 + s[2] * 1


# -------------------------------------------
# 游댠 ENVIAR ACCI칍N INICIAL PARA ARRANCAR
# -------------------------------------------
print("Enviando acci칩n inicial (avanzar = 0)...")
send_action(0)
time.sleep(2)   # esperar para que el ESP32 empiece a enviar JSON
print("Comenzando entrenamiento...")


last_save = time.time()

# -------------------------------------------
# 游대 BUCLE PRINCIPAL RL
# -------------------------------------------
while True:
    state = get_state()
    if state is None:
        continue

    idx = state_to_index(state)

    # pol칤tica epsilon-greedy
    if np.random.rand() < epsilon:
        action = choice(ACTIONS)
    else:
        action = np.argmax(Q[idx])

    send_action(action)
    time.sleep(0.15)

    new_state = get_state()
    if new_state is None:
        continue

    reward = reward_function(new_state)
    new_idx = state_to_index(new_state)

    # actualizaci칩n Q-learning
    Q[idx, action] = Q[idx, action] + alpha * (
        reward + gamma * np.max(Q[new_idx]) - Q[idx, action]
    )

    print("State:", state, "Action:", action, "Reward:", reward)

    # guardado autom치tico
    if time.time() - last_save > 5:
        np.save("qtable.npy", Q)
        print("Q-table guardada.")
        last_save = time.time()


Q-table cargada desde archivo.
Enviando acci칩n inicial (avanzar = 0)...
Comenzando entrenamiento...
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 0 Reward: 1.0
Q-table guardada.
State: [0 0 0] Action: 0 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
Q-table guardada.
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0] Action: 1 Reward: 1.0
State: [0 0 0

In [3]:
import numpy as np

Q = np.load("qtable.npy")
Q

Q_int = (Q * 1000).astype(int)  # opcional: escalar

with open("qtable.h", "w") as f:
    f.write("const int Q[8][3] = {\n")
    for row in Q_int:
        f.write("  {" + ", ".join(map(str, row)) + "},\n")
    f.write("};\n")
