## Description of Exploration
---------------------------
 In this exploration, I aim to develop a generalized, modular hybrid approach that combines tabular methods 
 and function approximation. The core idea is to decompose the input space into features or feature subsets 
 and approximate values modularly, rather than relying on a single global function or a massive state-action table. 
 This structured representation allows for a "committee of experts" approach, where each expert (module) 
 focuses on specific aspects of the input and contributes to the final decision.

 I will use the CarRacing v2 environment as a testbed for this approach. The environment's continuous state 
 and action spaces provide an excellent opportunity to evaluate the effectiveness of blending tabular structures 
 with modular function approximation. By systematically mapping features or feature combinations to tabular 
 values or approximators, I will assess how well this hybrid method performs in terms of learning efficiency 
 and policy quality.

In [8]:
import numpy as np

class ExpertModule:
    def predict(self, feature_dict, action): ...
    def update(self, feature_dict, action, target): ...

# Examples
class TabularCenterOffset(ExpertModule):
    def __init__(self, num_bins=10, num_actions=5, alpha=0.1, gamma=0.99):
        self.q_table = np.zeros((num_bins, num_actions))
        self.alpha = alpha
        self.gamma = gamma
        self.num_bins = num_bins
        self.num_actions = num_actions

    def predict(self, features, action):
        # Use curvature as the feature for discretization
        bin_index = discretize_center_offset(features[1], self.num_bins)  # features[1] is curvature
        return self.q_table[bin_index, action]

    def best_action(self, features):
        # Use curvature as the feature for discretization
        bin_index = discretize_center_offset(features[1], self.num_bins)  # features[1] is curvature
        return int(np.argmax(self.q_table[bin_index]))

    def update(self, features, action, target):
        # Use curvature as the feature for discretization
        bin_index = discretize_center_offset(features[1], self.num_bins)  # features[1] is curvature
        old_value = self.q_table[bin_index, action]
        self.q_table[bin_index, action] += self.alpha * (target - old_value)

class LinearCurvature(ExpertModule):
    def __init__(self, alpha=0.1):
        self.weights = np.zeros(1)  # Single weight for linear approximation
        self.alpha = alpha

    def predict(self, features, action):
        curvature = features[1]  # Extract curvature from features tuple
        return self.weights[0] * curvature

    def update(self, features, action, target):
        curvature = features[1]  # Extract curvature from features tuple
        prediction = self.predict(features, action)
        error = target - prediction
        self.weights[0] += self.alpha * error * curvature


class NeuralLaneAngle(ExpertModule):
    def __init__(self, input_dim=8, hidden_dim=16, output_dim=1, alpha=0.01):
        # Simple neural network with one hidden layer
        self.weights_input_hidden = np.random.randn(input_dim, hidden_dim) * 0.01
        self.weights_hidden_output = np.random.randn(hidden_dim, output_dim) * 0.01
        self.alpha = alpha

    def predict(self, features, action):
        features = np.array(features).reshape(1, -1)  # Convert features to array
        hidden = np.tanh(features @ self.weights_input_hidden)  # Hidden layer activation
        output = hidden @ self.weights_hidden_output  # Output layer
        return output[0, 0]  # Return scalar prediction

    def update(self, features, action, target):
        features = np.array(features).reshape(1, -1)  # Convert features to array
        hidden = np.tanh(features @ self.weights_input_hidden)  # Hidden layer activation
        output = hidden @ self.weights_hidden_output  # Output layer
        prediction = output[0, 0]  # Scalar prediction

        # Compute error
        error = target - prediction

        # Backpropagation
        grad_output = error  # Gradient at output layer
        grad_hidden = grad_output * self.weights_hidden_output.T * (1 - hidden ** 2)  # Gradient at hidden layer

        # Update weights
        self.weights_hidden_output += self.alpha * hidden.T @ grad_output.reshape(1, -1)
        self.weights_input_hidden += self.alpha * features.T @ grad_hidden


def discretize_center_offset(center_offset, num_bins=10):
    # Clip to [-1, 1] and shift to [0, 2] for positive indexing
    clipped = np.clip(center_offset, -1.0, 1.0)
    bin_index = int((clipped + 1.0) / 2.0 * (num_bins - 1))
    return bin_index


class ExpertEnsemble:
    def __init__(self, experts, gating_strategy='linear', alpha=0.01):
        self.experts = experts
        self.alpha = alpha
        self.num_experts = len(experts)

        # Linear gating for now: weights per expert
        if gating_strategy == 'linear':
            self.gating_weights = np.ones(self.num_experts) / self.num_experts
        self.gating_strategy = gating_strategy

    def predict(self, feature_dict, action):
        print(f"Predicting with gating strategy: {self.gating_strategy}")
        print(f"Feature dict: {feature_dict}, Action: {action}")
        expert_preds = []
        for expert in self.experts:
            pred = expert.predict(feature_dict, action)
            expert_preds.append(pred)
        print(f"Expert prediction: {pred}")  # Debugging line
        expert_preds = np.array(expert_preds)
        weighted_prediction = np.dot(self.gating_weights, expert_preds)
        return weighted_prediction

    def update(self, feature_dict, action, target):
        expert_preds = np.array([expert.predict(feature_dict, action) for expert in self.experts])
        ensemble_pred = np.dot(self.gating_weights, expert_preds)

        error = target - ensemble_pred

        # Update each expert individually
        for expert in self.experts:
            expert.update(feature_dict, action, target)

        # Update gating weights (basic gradient descent style)
        grad = error * expert_preds
        self.gating_weights += self.alpha * grad
        self.gating_weights = self._normalize(self.gating_weights)

    def _normalize(self, weights):
        # Normalize to sum to 1 for stability
        total = np.sum(weights)
        if total == 0:
            return np.ones_like(weights) / len(weights)
        return weights / total

In [None]:
import gymnasium as gym
import numpy as np
import cv2
from collections import deque
import pygame  
# discrete action space for CarRacing-v3
# 0: do nothing
# 1: steer left
# 2: steer right
# 3: accelerate/gas
# 4: brake

env = gym.make("CarRacing-v3", continuous=False, render_mode="rgb_array")
observedPixels, _ = env.reset()
center_offset_module = TabularCenterOffset()
experts = [LinearCurvature(alpha=0.05), NeuralLaneAngle(), TabularCenterOffset()]
model = ExpertEnsemble(experts)
epsilon = 0.1  # ε-greedy exploration
gamma = 0.99
CROP_HEIGHT_PERCENTAGE = 0.12

# Initialize pygame
pygame.init()
scale_factor = 4  # Increased scale factor
width, height = 600, 400  # Adjusted window size
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("CarRacing-v3 Observation with Features")

# Track last few observations for temporal features
obs_history = deque(maxlen=5)
speed_history = deque(maxlen=5)
is_first_run = True

def extract_features(obs, obs_history, speed_history):
    vel_vec = env.unwrapped.car.hull.linearVelocity
    speed = np.linalg.norm([vel_vec.x, vel_vec.y])
    wheel_steering = (env.unwrapped.car.wheels[0].steer) / 0.6  # Normalize steering to [-1, 1] it wll mostly be [-1, 0, 1]
    # Where -1 is left, 0 is straight, and 1 is right  
    print(wheel_steering, speed)
    
    # Track acceleration from speed history
    speed_history.append(speed)
    
    if len(speed_history) == 2:
        # Approximate acceleration (delta_v / delta_t), assuming ~50 FPS → delta_t = 1/50
        acceleration = (speed_history[-1] - speed_history[-2]) * 50.0
        acceleration = np.clip(acceleration / 10.0, -1.0, 1.0)  # Normalize acceleration
    else:
        acceleration = 0.0

    gray = cv2.cvtColor(obs, cv2.COLOR_RGB2GRAY)
    RESIZE_SCALE = 64
    resized = cv2.resize(gray, (RESIZE_SCALE, RESIZE_SCALE))
    
    edges = auto_canny(resized)

    norm = resized / 255.0
    # Track center offset
    half = norm.shape[1]//2
    left_sum, right_sum = np.sum(norm[:, :half]), np.sum(norm[:, half:])
    center_offset = (right_sum - left_sum) / (left_sum + right_sum + 1e-5)
    
    # Curvature
    obs_history.append(norm)
    curvature = np.mean([np.abs(obs_history[i] - obs_history[i-1]).mean() 
                        for i in range(1, len(obs_history))]) if len(obs_history) == obs_history.maxlen else 0.0
    # Normalize curvature to [0, 1]
    # curvature = (curvature - min_curvature) / (max_curvature - min_curvature)
    curvature = np.clip(curvature*5.0,0.0,1.0)
    # Lane detection
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, 30, minLineLength=20, maxLineGap=10)
    lane_angles = [np.arctan2(y2 - y1, x2 - x1) for line in lines for x1, y1, x2, y2 in [line[0]]] if lines is not None else []
    avg_lane_angle = np.mean(lane_angles) if lane_angles else 0.0
    
    # Off-track detection
    top_pixels = obs[:20, :, :] / 255.0
    sky_color_example = np.array([120, 174, 255]) / 255.0
    diff = np.linalg.norm(top_pixels - sky_color_example, axis=2)
    off_track = 0 if np.mean(diff < 0.1) > 0.3  else 1  # Adjust threshold as needed

    left_edge, right_edge = estimate_lane_edges(norm)
    return [center_offset,
        curvature,
        avg_lane_angle / np.pi,
        min(speed / 100.0, 1.0),
        acceleration,
        float(off_track),
        left_edge,
        right_edge
    ]

def auto_canny(image, sigma=0.33):
    """Compute Canny edges with thresholds based on image median intensity."""
    v = np.median(image)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    return cv2.Canny(image, lower, upper)

# This gives values ∈ [0,1] where 0 is the far left of the image and 1 is the far right. 
# The closer left_edge is to 0.5, the more centered you are.
def estimate_lane_edges(norm):
    vertical_profile = np.mean(norm, axis=0)  # Average brightness vertically
    threshold = 0.3  # adjust as needed
    indices = np.where(vertical_profile > threshold)[0]
    if len(indices) > 0:
        left_edge = indices[0] / len(vertical_profile)
        right_edge = indices[-1] / len(vertical_profile)
    else:
        left_edge = 0.0
        right_edge = 1.0
    return left_edge, right_edge

running = True
initial_steps = 10  # Number of steps to go straight initially
current_step = 0

while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    # Take action
    if current_step < initial_steps:
        action = 3
    else:
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
            
        else:
            features = extract_features(observedPixels, obs_history, speed_history)
            action = center_offset_module.best_action(features)
            print(f"Step: {current_step} , Features: {features}, Action: {action}")

    observedPixels, reward, terminated, truncated, info = env.step(action)
    # removing the bar in the bottom of the screen
    observedPixels = observedPixels[:int(observedPixels.shape[0] * (1-CROP_HEIGHT_PERCENTAGE)), :, :]
    if is_first_run:
        print("Height of the image:", observedPixels.shape[0])
        print("Width of the image:", observedPixels.shape[1])
        print("Efficiency of the image:", (96*96)/(observedPixels.shape[0] * observedPixels.shape[1]) * 100, "%")
        is_first_run = False


    done = terminated or truncated
    next_features = extract_features(observedPixels, obs_history, speed_history)
    max_future_q = model.predict(next_features, action)  # Predict future Q-value
    target = reward + (gamma * max_future_q) 
    model.update(next_features, action, target)

    # Convert the observation (NumPy array) to a pygame Surface
    obs_resized = cv2.resize(observedPixels, (96, 96))  # Resize for pygame display
    obs_display = pygame.surfarray.make_surface(obs_resized.swapaxes(0, 1)) # Swap axes for pygame
    obs_display = pygame.transform.scale(obs_display, (width, height)) # Scale for visibility
    screen.blit(obs_display, (0, 0))

    pygame.display.flip()

    if done:
        print("Episode finished early.")
        observedPixels, _ = env.reset()
        obs_history.clear()
        current_step = 0
    else:
        current_step += 1

env.close()
pygame.quit()

Height of the image: 84
Width of the image: 96
Efficiency of the image: 114.28571428571428 %
0.0 2.1912047821085263
Predicting with gating strategy: linear
Feature dict: [-0.587121511597182, 0.0, 0.025206835711782977, 0.021912047821085263, 0.0, 1.0, 0.0, 0.59375], Action: 3
Expert prediction: 0.0
0.0 3.3980939302578324
Predicting with gating strategy: linear
Feature dict: [-0.5169705601237876, 0.0, 0.47981694033617667, 0.033980939302578325, 1.0, 1.0, 0.0, 0.640625], Action: 3
Expert prediction: 0.6992303831417784
0.0 4.357070621852767
Predicting with gating strategy: linear
Feature dict: [-0.45157433868904057, 0.0, 0.4780822987275691, 0.04357070621852767, 0.0, 1.0, 0.0, 0.671875], Action: 3
Expert prediction: 0.6423994312473171
0.0 5.253578974304159
Predicting with gating strategy: linear
Feature dict: [-0.3787870469004086, 0.0, 0.47438528156299664, 0.05253578974304159, 0.0, 1.0, 0.0, 0.703125], Action: 3
Expert prediction: 0.5893439296769
0.0 6.134349858984609
Predicting with gating s