In [None]:
import serial
import json
import numpy as np
import tensorflow as tf

# --- Configurar comunicaci칩n serial ---
ser = serial.Serial('/dev/ttyUSB0', 115200)

# --- Red neuronal peque침a ---
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(3,)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')  # forward, left, right, stop
])

optimizer = tf.keras.optimizers.Adam(1e-3)

# Tabla de acciones
ACTIONS = ["forward", "left", "right", "stop"]

def choose_action(state):
    state = np.array(state).reshape(1, 3)
    probs = model.predict(state, verbose=0)[0]
    return np.random.choice(len(ACTIONS), p=probs)

# --- Bucle de entrenamiento RL ---
while True:
    line = ser.readline().decode().strip()
    data = json.loads(line)

    state = np.array([data["L"], data["C"], data["R"]])

    action_index = choose_action(state)
    action = ACTIONS[action_index]

    ser.write((action + "\n").encode())

    # --- Definir recompensa ---
    reward = 0
    min_dist = min(state)

    if min_dist < 12:
        reward = -1.0
    else:
        reward = 0.2 if action == "forward" else 0.0

    with tf.GradientTape() as tape:
        logits = model(np.array(state).reshape(1,3), training=True)
        action_prob = logits[0, action_index]
        loss = -tf.math.log(action_prob + 1e-8) * reward

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    print("State:", state, "Action:", action, "Reward:", reward)


=== Simulaci칩n de entrenamiento RL ===
State: [19  5 40]  Action: stop  Reward: -1.0
State: [40 25 25]  Action: forward  Reward: 0.2
State: [40 45 21]  Action: forward  Reward: 0.0
State: [44 26 19]  Action: left  Reward: 0.2
State: [35  6 39]  Action: forward  Reward: -1.0
State: [25 29 48]  Action: right  Reward: 0.0
State: [12 32 48]  Action: left  Reward: 0.2
State: [37 29 29]  Action: forward  Reward: 0.2
State: [46 45 40]  Action: right  Reward: 0.2
State: [21 45 38]  Action: forward  Reward: 0.2
State: [ 5 50 44]  Action: forward  Reward: -1.0
State: [28 20 22]  Action: stop  Reward: 0.2
State: [36 24  5]  Action: forward  Reward: -1.0
State: [50 41  5]  Action: right  Reward: -1.0
State: [ 5  8 30]  Action: right  Reward: -1.0
State: [28 39 13]  Action: stop  Reward: 0.2
State: [21 47 33]  Action: right  Reward: 0.0
State: [42 26 28]  Action: stop  Reward: 0.2
State: [36  9 29]  Action: stop  Reward: -1.0
State: [13 42 50]  Action: forward  Reward: 0.0
State: [12 48 40]  Action

KeyboardInterrupt: 

In [None]:
model.save("robot_model.h5")

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("robot.tflite", "wb") as f:
    f.write(tflite_model)


INFO:tensorflow:Assets written to: /tmp/tmpex84r63y/assets


INFO:tensorflow:Assets written to: /tmp/tmpex84r63y/assets


Saved artifact at '/tmp/tmpex84r63y'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 3), dtype=tf.float32, name='keras_tensor_969')
Output Type:
  TensorSpec(shape=(None, 4), dtype=tf.float32, name=None)
Captures:
  124838863490320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  124838863492048: TensorSpec(shape=(), dtype=tf.resource, name=None)
  124838863493008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  124838863492240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  124838863493392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  124838863493200: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1763122021.693826   66928 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
W0000 00:00:1763122021.693852   66928 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-11-14 08:07:01.694219: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpex84r63y
2025-11-14 08:07:01.694673: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-11-14 08:07:01.694680: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpex84r63y
I0000 00:00:1763122021.698172   66928 mlir_graph_optimization_pass.cc:437] MLIR V1 optimization pass is not enabled
2025-11-14 08:07:01.698875: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-11-14 08:07:01.726366: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpex84r63y
2025-11-14 08:07:01.733237: I tensorflow/cc/saved_model/loader.cc:471] SavedModel 