## Description of Exploration
---------------------------
 In this exploration, I aim to develop a generalized, modular hybrid approach that combines tabular methods 
 and function approximation. The core idea is to decompose the input space into features or feature subsets 
 and approximate values modularly, rather than relying on a single global function or a massive state-action table. 
 This structured representation allows for a "committee of experts" approach, where each expert (module) 
 focuses on specific aspects of the input and contributes to the final decision.

 I will use the CarRacing v2 environment as a testbed for this approach. The environment's continuous state 
 and action spaces provide an excellent opportunity to evaluate the effectiveness of blending tabular structures 
 with modular function approximation. By systematically mapping features or feature combinations to tabular 
 values or approximators, I will assess how well this hybrid method performs in terms of learning efficiency 
 and policy quality.

In [2]:
import numpy as np

class ExpertModule:
    def predict(self, feature_dict, action): ...
    def update(self, feature_dict, action, target): ...

# Examples
class TabularCenterOffset:
    def __init__(self, num_bins=10, num_actions=5, alpha=0.1, gamma=0.99):
        self.q_table = np.zeros((num_bins, num_actions))
        self.alpha = alpha
        self.gamma = gamma
        self.num_bins = num_bins
        self.num_actions = num_actions

    def predict(self, features, action):
        bin_index = discretize_center_offset(features["center_offset"], self.num_bins)
        return self.q_table[bin_index, action]

    def best_action(self, features):
        bin_index = discretize_center_offset(features["center_offset"], self.num_bins)
        return int(np.argmax(self.q_table[bin_index]))

    def update(self, features, action, target):
        bin_index = discretize_center_offset(features["center_offset"], self.num_bins)
        old_value = self.q_table[bin_index, action]
        self.q_table[bin_index, action] += self.alpha * (target - old_value)

class LinearCurvature(ExpertModule): ...
class NeuralLaneAngle(ExpertModule): ...


def discretize_center_offset(center_offset, num_bins=10):
    # Clip to [-1, 1] and shift to [0, 2] for positive indexing
    clipped = np.clip(center_offset, -1.0, 1.0)
    bin_index = int((clipped + 1.0) / 2.0 * (num_bins - 1))
    return bin_index


In [None]:
import gymnasium as gym
import numpy as np
import cv2
from collections import deque
import pygame  # Import pygame
# discrete action space for CarRacing-v3
# 0: do nothing
# 1: steer left
# 2: steer right
# 3: accelerate/gas
# 4: brake

env = gym.make("CarRacing-v3", continuous=False, render_mode="rgb_array")
observedPixels, _ = env.reset()
center_offset_module = TabularCenterOffset()
epsilon = 0.1  # ε-greedy exploration
gamma = 0.99
CROP_HEIGHT_PERCENTAGE = 0.12

# Initialize pygame
pygame.init()
scale_factor = 4  # Increased scale factor
width, height = 600, 400  # Adjusted window size
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("CarRacing-v3 Observation with Features")

# Track last few observations for temporal features
obs_history = deque(maxlen=5)

def extract_features(obs, obs_history):

    vel_vec = env.unwrapped.car.hull.linearVelocity
    speed = np.linalg.norm([vel_vec.x, vel_vec.y])

    gray = cv2.cvtColor(obs, cv2.COLOR_RGB2GRAY)
    resized = cv2.resize(gray, (96, 96))  # Increased resolution for better feature extraction
    norm = resized / 255.0 

    # Edge detection
    edges = cv2.Canny((norm * 255).astype(np.uint8), 50, 150)

    # needs double checking
    # Track center offset
    left_sum = np.sum(norm[:, :48])
    right_sum = np.sum(norm[:, 48:])
    center_offset = (right_sum - left_sum) / (left_sum + right_sum + 1e-5)

    # Curvature approximation
    obs_history.append(norm)
    curvature = 0.0
    if len(obs_history) == obs_history.maxlen:
        diffs = [np.abs(obs_history[i] - obs_history[i-1]).mean() for i in range(1, len(obs_history))]
        curvature = np.mean(diffs)

    # Lane Line Detection (using Hough Transform)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=30, minLineLength=20, maxLineGap=10)
    lane_angles = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1)
            lane_angles.append(angle)
    avg_lane_angle = np.mean(lane_angles) if lane_angles else 0.0
    num_lane_lines = len(lane_angles)

    # Off-Track Detection (rough approximation based on a color in the periphery)
    off_track = False
    sky_color_example = np.array([120, 174, 255]) / 255.0 # Example sky color (might need adjustment)
    top_pixels = obs[:20, :, :] / 255.0
    diff = np.linalg.norm(top_pixels - sky_color_example, axis=2)
    if np.mean(diff < 0.1) > 0.3:  # If a significant portion of the top is sky-like
        off_track = True

    return {
        "center_offset": center_offset,
        "curvature": curvature,
        "edge_density": np.sum(edges) / (96*96),
        "avg_lane_angle": avg_lane_angle,
        "num_lane_lines": num_lane_lines,
        "off_track": off_track,
        "speed": speed,
    }

running = True
initial_steps = 10  # Number of steps to go straight initially
current_step = 0

while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    # Take action
    if current_step < initial_steps:
        action = 3
    else:
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
            
        else:
            features = extract_features(observedPixels, obs_history)
            action = center_offset_module.best_action(features)
            print(f"Step: {current_step} , Features: {features}, Action: {action}")

    observedPixels, reward, terminated, truncated, info = env.step(action)
    # removing the bar in the bottom of the screen
    observedPixels = observedPixels[:int(observedPixels.shape[0] * (1-CROP_HEIGHT_PERCENTAGE)), :, :]
    print("Height of the image:", observedPixels.shape[0])
    print("Width of the image:", observedPixels.shape[1])
    print("Efficiency of the image:", (96*96)/(observedPixels.shape[0] * observedPixels.shape[1]) * 100, "%")
    
    done = terminated or truncated
    
    next_features = extract_features(observedPixels, obs_history)
    max_future_q = max(center_offset_module.predict(next_features, action) for action in range(env.action_space.n))
    target = reward + (gamma * max_future_q) 

    # Convert the observation (NumPy array) to a pygame Surface
    obs_resized = cv2.resize(observedPixels, (96, 96))  # Resize for pygame display
    obs_display = pygame.surfarray.make_surface(obs_resized.swapaxes(0, 1)) # Swap axes for pygame
    obs_display = pygame.transform.scale(obs_display, (width, height)) # Scale for visibility
    screen.blit(obs_display, (0, 0))

    pygame.display.flip()

    if done:
        print("Episode finished early.")
        observedPixels, _ = env.reset()
        obs_history.clear()
        current_step = 0
    else:
        current_step += 1

env.close()
pygame.quit()

Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
Step: 10 , Features: {'center_offset': 0.011308937260593522, 'curvatur