In [11]:
import gymnasium as gym
import numpy as np
import cv2

def preprocess_observation(observation):
    """
    Convert the observation to grayscale and apply edge detection to identify track boundaries.
    """
    # Convert to grayscale
    gray = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
    
    # Apply Sobel edge detection in both horizontal and vertical directions
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
    
    # Combine the Sobel X and Y results to get the overall edge magnitude
    edges = np.sqrt(sobelx**2 + sobely**2)
    
    # Normalize edges to range 0-1 for easier thresholding
    edges_normalized = cv2.normalize(edges, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    
    return edges_normalized

def simple_policy(edges):
    """
    Determine an action based on the edge detection result. This is a placeholder for a more sophisticated policy.
    """
    # If significant edges are detected in the lower part of the image, it might indicate being off-track.
    lower_half_edges = edges[edges.shape[0]//2:, :]
    edge_strength = np.mean(lower_half_edges)
    # Define a threshold to determine if we are off the track based on edge strength
    if edge_strength > 0.06:  # This threshold is arbitrary; adjust based on your observations
        return [0, 1, 0]  # Straight with full acceleration
    else:
        # If we detect less edges, it might indicate being off-track, so try turning
        return [np.random.uniform(-1, 1), 0.5, 0]  # Random steering with some acceleration

# Initialize environment
env = gym.make("CarRacing-v2", domain_randomize=False, render_mode="human")

episodes = 3
steps = 100

for _ in range(episodes):

    # Main loop
    observation, info = env.reset(options={"randomize": False})
    for _ in range(steps):
        # Preprocess the observation to get edge information
        edges = preprocess_observation(observation)
        
        # Decide on an action based on edges
        action = simple_policy(edges)
        
        # Apply the action
        observation, reward, terminated, truncated, info = env.step(action)
        env.render()
        
        if terminated or truncated:
            break

env.close()


KeyboardInterrupt: 