In [None]:
from coderbot_sim.topdown_driving.tk import TopDownDrivingTkFrontend
from coderbot_sim.topdown_driving import TopDownDrivingEnv
import numpy as np

under = TopDownDrivingEnv(num_envs=2)
env = TopDownDrivingTkFrontend(sim_env=under)

env.render()
index = 0

for _ in range(500):
    throttle = np.zeros(2, dtype=np.float32)
    steer = np.zeros(2, dtype=np.float32)
    if "Up" in env.keys:
        throttle[index] = 1.0
    if "Down" in env.keys:
        throttle[index] = -1.0
    if "Left" in env.keys:
        steer[index] = -1.0
    if "Right" in env.keys:
        steer[index] = 1.0

    if "Shift_L" in env.keys:
        index = (index + 1) % env.sim_env.num_envs

    action = {"throttle": throttle, "steer": steer}
    state = await env.step(action, dt=0.02)
    # print(state["reward"])
    
    # Example Neural Network for controlling the car
    # Input: state["rays"], 5 hit rays by distance, [d0, d1, d2, d3, d4]
    # Output layer with 2 neurons [throttle, steer]

In [2]:
import numpy as np

class SmallNN:
    def __init__(self, in_dim=5, hidden=8, out_dim=2):
        self.w1 = np.random.randn(in_dim, hidden) * 0.5
        self.b1 = np.zeros(hidden)
        self.w2 = np.random.randn(hidden, out_dim) * 0.5
        self.b2 = np.zeros(out_dim)

    def forward(self, x):
        h = np.tanh(x @ self.w1 + self.b1)
        out = np.tanh(h @ self.w2 + self.b2)
        return out

    def clone(self):
        nn = SmallNN()
        nn.w1 = self.w1.copy()
        nn.b1 = self.b1.copy()
        nn.w2 = self.w2.copy()
        nn.b2 = self.b2.copy()
        return nn

    def mutate(self, scale):
        self.w1 += np.random.randn(*self.w1.shape) * scale
        self.b1 += np.random.randn(*self.b1.shape) * scale
        self.w2 += np.random.randn(*self.w2.shape) * scale
        self.b2 += np.random.randn(*self.b2.shape) * scale
        

In [None]:
from coderbot_sim.topdown_driving.tk import TopDownDrivingTkFrontend
from coderbot_sim.topdown_driving import TopDownDrivingEnv
import numpy as np

under = TopDownDrivingEnv(num_envs=25)
nets = [SmallNN() for _ in range(under.num_envs)]

env = TopDownDrivingTkFrontend(sim_env=under)
env.render()

action = {"throttle": 0.0, "steer": 0.0}
state = await env.step(action, dt=0.00)

for _ in range(600):
    throttle = np.zeros(under.num_envs, dtype=np.float32)
    steer = np.zeros(under.num_envs, dtype=np.float32)
    
    for i in range(under.num_envs):
        rays = state["rays"][i]
        output = nets[i].forward(rays)
        throttle[i] = output[0]
        steer[i] = output[1]
    
    action = {"throttle": throttle, "steer": steer}
    state = await env.step(action, dt=0.02)

In [None]:
def evolve(nets, fitness, elite_frac, mutation_scale):
    pop = len(nets)
    elite_count = max(1, int(pop * elite_frac))

    idx = np.argsort(fitness)[::-1]
    elites = idx[:elite_count]

    new_nets = []
    for i in range(pop):
        parent = nets[np.random.choice(elites)].clone()
        if i >= elite_count:
            parent.mutate(mutation_scale)
        new_nets.append(parent)

    return new_nets

def train_population(
    env,
    nets,
    generations=20,
    episode_steps=500,
    elite_frac=0.15,
    mutation_scale=0.15,
    dt=0.02,
):
    num_envs = env.num_envs
    fitness = np.zeros(num_envs, dtype=np.float32)

    for gen in range(generations):
        # reset episode
        env.reset()
        
        action = {"throttle": 0.0, "steer": 0.0}
        state = env.step(action, dt=0.0)
        fitness[:] = 0.0

        for _ in range(episode_steps):
            throttle = np.zeros(num_envs, dtype=np.float32)
            steer = np.zeros(num_envs, dtype=np.float32)

            rays_all = state["rays"]

            for i in range(num_envs):
                rays = np.clip(rays_all[i], 0.0, 1.0)
                out = nets[i].forward(rays)
                throttle[i] = out[0]
                steer[i] = out[1]

            action = {"throttle": throttle, "steer": steer}
            state = env.step(action, dt=dt)

            fitness += state["reward"]

        nets = evolve(
            nets,
            fitness,
            elite_frac=elite_frac,
            mutation_scale=mutation_scale,
        )

        print(
            f"Gen {gen:04d} | "
            f"mean: {fitness.mean():.2f} | "
            f"best: {fitness.max():.2f}"
        )

    return nets

In [None]:
from coderbot_sim.topdown_driving import TopDownDrivingEnv

env = TopDownDrivingEnv(num_envs=200)
nets = [SmallNN() for _ in range(env.num_envs)]

nets = train_population(
    env,
    nets,
    generations=20,
    episode_steps=600,
)