In [None]:
import pygame
import random
import math
import time
import numpy as np
from stable_baselines3 import PPO
  

  
# Make sure your Pygame is 2.x with SDL2
from pygame._sdl2.video import Window, Renderer, Texture


pygame.init()
pygame.joystick.init()

###############################################################################
# Config / Constants
###############################################################################
FULL_VIEW_SIZE = (1200, 800)   # Window #1 (full view)
RED_ONLY_SIZE  = (1200, 800)   # Window #2 (red-arrow-only)

NOISE_MAGNITUDE = 0.5
MIN_NOISE = 0.0
MAX_NOISE = 2.0
NOISE_STEP = 0.1

OLD_WINDOW_SIZE   = (600, 600)  # original reference
SCALING_FACTOR_X  = FULL_VIEW_SIZE[0] / OLD_WINDOW_SIZE[0]
SCALING_FACTOR_Y  = FULL_VIEW_SIZE[1] / OLD_WINDOW_SIZE[1]
SCALING_FACTOR    = (SCALING_FACTOR_X + SCALING_FACTOR_Y) / 2

WHITE  = (255, 255, 255)
BLACK  = (0, 0, 0)
RED    = (255, 0, 0)
GREEN  = (0, 200, 0)
BLUE   = (0, 0, 255)
YELLOW = (255, 255, 0)
GRAY   = (128, 128, 128)

FONT_COLOR = (0, 0, 0)
FONT_SIZE = int(24 * SCALING_FACTOR)
ARROW_LENGTH = int(60 * SCALING_FACTOR)

NUM_GOALS            = 3
OBSTACLE_RADIUS      = int(20 * SCALING_FACTOR)
COLLISION_BUFFER     = int(5 * SCALING_FACTOR)
ENABLE_OBSTACLES     = True
MAX_SPEED            = 3 * SCALING_FACTOR

DOT_RADIUS           = int(30 * SCALING_FACTOR)
TARGET_RADIUS        = int(10 * SCALING_FACTOR)
GOAL_DETECTION_RADIUS= DOT_RADIUS + TARGET_RADIUS

GHOST_TRAIL_DURATION = 3.0  # seconds
recent_positions     = []   # store (x, y, timestamp)
last_reset_time = time.time()  # Track time since last reset

RECENT_DIR_LOOKBACK  = 1.0
GOAL_SWITCH_THRESHOLD= 0.05

WINDOW_CENTER = (FULL_VIEW_SIZE[0] // 2, FULL_VIEW_SIZE[1] // 2)
START_POS = [WINDOW_CENTER[0], WINDOW_CENTER[1]]
dot_pos   = START_POS.copy()

gamma         = 0.2
reached_goal  = False
targets       = []
current_target_idx = 0
obstacles     = []

# Joystick
joystick = None
if pygame.joystick.get_count() > 0:
    joystick = pygame.joystick.Joystick(0)
    joystick.init()
    print("Joystick initialized:", joystick.get_name())
else:
    print("No joystick detected.")

AXIS_L2 = 4
AXIS_R2 = 5

###############################################################################
# Create Two Windows & Renderers
###############################################################################
window1 = Window("2D Environment: Full View", size=FULL_VIEW_SIZE)
renderer1 = Renderer(window1, vsync=True)

window2 = Window("2D Environment: Red Arrow Only", size=RED_ONLY_SIZE)
renderer2 = Renderer(window2, vsync=True)

###############################################################################
# Create in-memory Surfaces (32-bit RGB with specific masks to avoid alpha issues)
###############################################################################
def create_compatible_surface(size):
    """Create a surface that's compatible with SDL2 textures."""
    return pygame.Surface(size, flags=pygame.SRCALPHA)

# Create surfaces with explicit format
surface_full = create_compatible_surface(FULL_VIEW_SIZE)
surface_red_only = create_compatible_surface(RED_ONLY_SIZE)

# Load font
font = pygame.font.Font(None, FONT_SIZE)

###############################################################################
# Helper: Convert a pygame.Surface to an SDL2 Texture
###############################################################################
def surface_to_texture(renderer, surf):
    """
    Creates a new Texture from a surface, handling format conversion.
    
    Parameters:
    renderer: SDL2 Renderer
    surf: pygame.Surface
    
    Returns:
    SDL2 Texture
    """
    # Convert surface to RGBA format if needed
    if surf.get_bitsize() != 32:
        surf = surf.convert_alpha()
    
    # Create texture with target format
    texture = Texture.from_surface(renderer, surf)
    return texture

###############################################################################
# Helper / Utility functions
###############################################################################
def distance(pos1, pos2):
    return math.hypot(pos1[0] - pos2[0], pos1[1] - pos2[1])

def line_circle_intersection(start, end, circle_center, radius):
    dx = end[0] - start[0]
    dy = end[1] - start[1]
    cx = circle_center[0] - start[0]
    cy = circle_center[1] - start[1]
    l2 = dx*dx + dy*dy
    if l2 == 0:
        return distance(start, circle_center) <= radius
    t = max(0, min(1, (cx*dx + cy*dy) / l2))
    proj_x = start[0] + t * dx
    proj_y = start[1] + t * dy
    return distance((proj_x, proj_y), circle_center) <= radius

def check_collision(pos, new_pos):
    if not ENABLE_OBSTACLES:
        return False
    for obstacle_pos in obstacles:
        if line_circle_intersection(pos, new_pos, obstacle_pos, OBSTACLE_RADIUS + COLLISION_BUFFER):
            return True
    return False

def get_recent_direction():
    """Compute an approximate velocity direction from recent_positions."""
    if len(recent_positions) < 2:
        return [0, 0]
    current_time = time.time()
    valid_points = []
    # gather points within RECENT_DIR_LOOKBACK
    for (x, y, t) in reversed(recent_positions):
        if (current_time - t) <= RECENT_DIR_LOOKBACK:
            valid_points.append((x, y, t))
        else:
            break
    if len(valid_points) < 2:
        return [0, 0]

    # sort ascending by time
    valid_points.sort(key=lambda p: p[2])
    x1, y1, t1 = valid_points[0]
    x2, y2, t2 = valid_points[-1]
    dt = t2 - t1
    if dt < 0.001:
        return [0, 0]
    vx = (x2 - x1) / dt
    vy = (y2 - y1) / dt
    mag = math.hypot(vx, vy)
    return [vx/mag, vy/mag] if mag > 0 else [0, 0]

###############################################################################
# Target prediction & generation
###############################################################################
def predict_human_target(human_input):
    global current_target_idx

    dist_to_current = distance(dot_pos, targets[current_target_idx])
    close_threshold = GOAL_DETECTION_RADIUS * 2
    if dist_to_current < close_threshold:
        return current_target_idx

    if human_input[0] == 0 and human_input[1] == 0:
        return current_target_idx

    # immediate direction from human input
    h_mag = math.hypot(human_input[0], human_input[1])
    h_dir = [human_input[0]/h_mag, human_input[1]/h_mag] if h_mag > 0 else [0, 0]
    recent_dir = get_recent_direction()

    best_score = float('-inf')
    best_idx   = current_target_idx

    for i, targ in enumerate(targets):
        to_tx = targ[0] - dot_pos[0]
        to_ty = targ[1] - dot_pos[1]
        to_mag = math.hypot(to_tx, to_ty)
        if to_mag == 0:
            continue
        to_dir = [to_tx/to_mag, to_ty/to_mag]

        max_dist = math.hypot(FULL_VIEW_SIZE[0], FULL_VIEW_SIZE[1])
        dist_factor  = 1 - (distance(dot_pos, targ) / max_dist)
        align_human  = (h_dir[0]*to_dir[0] + h_dir[1]*to_dir[1])
        align_recent = (recent_dir[0]*to_dir[0] + recent_dir[1]*to_dir[1])

        score = (align_human * 0.2) + (align_recent * 0.3) + (dist_factor * 0.5)
        if score > best_score:
            best_score = score
            best_idx   = i

    # hysteresis
    if best_idx != current_target_idx:
        # check improvement
        i = current_target_idx
        to_tx = targets[i][0] - dot_pos[0]
        to_ty = targets[i][1] - dot_pos[1]
        to_mag= math.hypot(to_tx, to_ty)
        curr_score = float('-inf')
        if to_mag > 0:
            to_dir = [to_tx/to_mag, to_ty/to_mag]
            max_dist = math.hypot(FULL_VIEW_SIZE[0], FULL_VIEW_SIZE[1])
            dist_factor = 1 - (distance(dot_pos, targets[i]) / max_dist)
            align_human  = h_dir[0]*to_dir[0] + h_dir[1]*to_dir[1]
            align_recent = recent_dir[0]*to_dir[0] + recent_dir[1]*to_dir[1]
            curr_score   = (align_human * 0.225) + (align_recent * 0.325) + (dist_factor * 0.45)

        improvement = best_score - curr_score
        if improvement < GOAL_SWITCH_THRESHOLD:
            return current_target_idx

    return best_idx

def generate_obstacles():
    obstacles.clear()
    if not ENABLE_OBSTACLES:
        return
    # Example random obstacles if desired:
    # for _ in range(3):
    #     x = random.randint(50, FULL_VIEW_SIZE[0]-50)
    #     y = random.randint(50, FULL_VIEW_SIZE[1]-50)
    #     obstacles.append([x, y])

def generate_targets():
    targets.clear()
    for _ in range(NUM_GOALS):
        while True:
            pos = [random.randint(0, FULL_VIEW_SIZE[0]),
                   random.randint(0, FULL_VIEW_SIZE[1])]
            valid_position = True
            if ENABLE_OBSTACLES:
                for o in obstacles:
                    if distance(pos, o) < OBSTACLE_RADIUS * 1.5:
                        valid_position = False
                        break
            if valid_position:
                targets.append(pos)
                break




class GammaPredictor:
    def __init__(self, model_path="dynamic_arbitration_ppo_human_centric"):
        """Initialize the gamma predictor with a trained model."""
        self.model = PPO.load(model_path)
        
    def prepare_observation(self, dot_pos, target_pos, human_input):
        """Convert game state to the format expected by the model."""
        # Convert to numpy arrays and normalize
        dot_pos = np.array(dot_pos, dtype=np.float32)
        target_pos = np.array(target_pos, dtype=np.float32)
        
        # Calculate distance to target
        to_target = target_pos - dot_pos
        dist = np.linalg.norm(to_target)
        perfect_dir = to_target / dist if dist > 0 else np.array([0, 0])
        
        # Normalize the human input direction
        h_mag = np.linalg.norm(human_input)
        human_dir = np.array(human_input) / h_mag if h_mag > 0 else np.array([0, 0], dtype=np.float32)
        
        # Calculate human alignment with perfect direction
        human_alignment = np.dot(human_dir, perfect_dir)
        
        # Create observation vector with all 10 dimensions
        obs = np.concatenate([
            dot_pos,           # Current position (2)
            human_dir,         # Normalized human input direction (2)
            target_pos,        # Target position (2)
            perfect_dir,       # Perfect direction to target (2)
            [dist],           # Distance to target (1)
            [human_alignment] # Human alignment with perfect direction (1)
        ])
        
        return obs
        
    def predict_gamma(self, dot_pos, target_pos, human_input):
        """Predict gamma value for the current game state."""
        obs = self.prepare_observation(dot_pos, target_pos, human_input)
        
        # Debug prints to check values
        print("Observation:", obs)
        
        action, _ = self.model.predict(obs[None], deterministic=True)
        
        # Debug prints
        print("Predicted gamma:", action[0])
        
        return float(action[0])


gamma_predictor = GammaPredictor()

###############################################################################
# Movement logic
###############################################################################
def move_dot(human_input):
    global dot_pos, gamma, reached_goal, current_target_idx

    h_dx, h_dy = human_input
    h_mag = math.hypot(h_dx, h_dy)
    h_dir = [h_dx / h_mag, h_dy / h_mag] if h_mag > 0 else [0, 0]

    target_pos = targets[current_target_idx]
    w_dx = target_pos[0] - dot_pos[0]
    w_dy = target_pos[1] - dot_pos[1]
    w_mag = math.hypot(w_dx, w_dy)
    w_dir = [w_dx / w_mag, w_dy / w_mag] if w_mag > 0 else [0, 0]

    input_mag = min(max(h_mag / MAX_SPEED, 0), 1)
    step_size = MAX_SPEED * input_mag

    # Weighted portion
    w_move_x = gamma * w_dir[0] * step_size
    w_move_y = gamma * w_dir[1] * step_size

    # Human + noise portion
    if h_mag > 0:


        gamma = gamma_predictor.predict_gamma(
            dot_pos=dot_pos,
            target_pos=targets[current_target_idx],
            human_input=[h_dx, h_dy]  # Pass raw input, normalization happens in predictor
        )



        noise_x = np.random.normal(0, NOISE_MAGNITUDE)
        noise_y = np.random.normal(0, NOISE_MAGNITUDE)
        noisy_dx = h_dir[0] + noise_x
        noisy_dy = h_dir[1] + noise_y
        nm = math.hypot(noisy_dx, noisy_dy)
        if nm > 0:
            noisy_dx /= nm
            noisy_dy /= nm
        h_move_x = (1 - gamma) * noisy_dx * step_size
        h_move_y = (1 - gamma) * noisy_dy * step_size
    else:
        h_move_x, h_move_y = 0, 0

    final_dx = w_move_x + h_move_x
    final_dy = w_move_y + h_move_y
    new_x = dot_pos[0] + final_dx
    new_y = dot_pos[1] + final_dy

    # collision check
    if not check_collision(dot_pos, [new_x, new_y]):
        dot_pos[0] = max(0, min(FULL_VIEW_SIZE[0], new_x))
        dot_pos[1] = max(0, min(FULL_VIEW_SIZE[1], new_y))

    # for arrow
    final_mag = math.hypot(final_dx, final_dy)
    x_dir = [final_dx / final_mag, final_dy / final_mag] if final_mag > 0 else [0, 0]

    # goal check
    dist_to_goal = distance(dot_pos, target_pos)
    if dist_to_goal < GOAL_DETECTION_RADIUS:
        reached_goal = True
        pygame.time.set_timer(pygame.USEREVENT, 1000)

    return h_dir, w_dir, x_dir

###############################################################################
# Reset
###############################################################################
def reset():
    global dot_pos, reached_goal, current_target_idx, gamma
    global recent_positions, last_reset_time
    dot_pos = START_POS.copy()
    reached_goal = False
    current_target_idx = 0
    gamma = 0.2
    recent_positions.clear()
    last_reset_time = time.time()  # Update reset time
    generate_obstacles()
    generate_targets()
    pygame.time.set_timer(pygame.USEREVENT, 0)

###############################################################################
# Drawing an arrow
###############################################################################
###############################################################################
# Drawing an arrow
###############################################################################
def draw_arrow(surface, color, start_pos, direction, length=ARROW_LENGTH):
    dx, dy = direction
    if dx == 0 and dy == 0:
        return
    mag = math.hypot(dx, dy)
    dx /= mag
    dy /= mag

    end_x = start_pos[0] + dx * length
    end_y = start_pos[1] + dy * length

    pygame.draw.line(surface, color, start_pos, (end_x, end_y), int(2 * SCALING_FACTOR))

    arrow_size = 7 * SCALING_FACTOR
    angle = math.atan2(dy, dx)
    arrow1_x = end_x - arrow_size * math.cos(angle + math.pi/6)
    arrow1_y = end_y - arrow_size * math.sin(angle + math.pi/6)
    arrow2_x = end_x - arrow_size * math.cos(angle - math.pi/6)
    arrow2_y = end_y - arrow_size * math.sin(angle - math.pi/6)

    pygame.draw.line(surface, color, (end_x, end_y), (arrow1_x, arrow1_y), int(2 * SCALING_FACTOR))
    pygame.draw.line(surface, color, (end_x, end_y), (arrow2_x, arrow2_y), int(2 * SCALING_FACTOR))

###############################################################################
# 1) render_full_view: Full scene in window #1
###############################################################################
def render_full_view(surface, h_dir, w_dir, x_dir):
    surface.fill(WHITE)

    # Obstacles
    if ENABLE_OBSTACLES:
        for obstacle_pos in obstacles:
            pygame.draw.circle(surface, GRAY, (int(obstacle_pos[0]), int(obstacle_pos[1])), OBSTACLE_RADIUS)

    # Targets
    for i, target in enumerate(targets):
        pygame.draw.circle(surface, YELLOW, (int(target[0]), int(target[1])), TARGET_RADIUS)
        num_text = font.render(str(i + 1), True, BLACK)
        surface.blit(num_text, (target[0] - 5, target[1] - 12))

    # Current target highlight
    curr_t = targets[current_target_idx]
    pygame.draw.circle(surface, BLACK, (int(curr_t[0]), int(curr_t[1])),
                      TARGET_RADIUS + 2, int(2 * SCALING_FACTOR))

    # Ghost path
    now = time.time()
    while len(recent_positions) > 0 and (now - recent_positions[0][2]) > GHOST_TRAIL_DURATION:
        recent_positions.pop(0)

    if len(recent_positions) > 1:
        for idx in range(len(recent_positions) - 1):
            x1, y1, t1 = recent_positions[idx]
            x2, y2, t2 = recent_positions[idx+1]
            pygame.draw.line(surface, (200, 200, 200), (x1, y1), (x2, y2), 2)

    # Dot
    pygame.draw.circle(surface, BLACK, (int(dot_pos[0]), int(dot_pos[1])),
                      DOT_RADIUS, int(2 * SCALING_FACTOR))

    # Arrows: H=blue, W=green, final=red
    if h_dir != [0,0]:
        draw_arrow(surface, BLUE, (int(dot_pos[0]), int(dot_pos[1])), h_dir, ARROW_LENGTH)
    if w_dir != [0,0]:
        draw_arrow(surface, GREEN, (int(dot_pos[0]), int(dot_pos[1])), w_dir, ARROW_LENGTH)
    if x_dir != [0,0]:
        draw_arrow(surface, RED, (int(dot_pos[0]), int(dot_pos[1])), x_dir, ARROW_LENGTH)

    # Info text
    g_txt = font.render(f"Gamma: {gamma:.2f}", True, FONT_COLOR)
    surface.blit(g_txt, (10, 10))

    form_txt = font.render(f"Movement = {gamma:.2f}W + {1-gamma:.2f}H", True, FONT_COLOR)
    surface.blit(form_txt, (10, 40))

    noise_txt = font.render(f"Noise σ: {NOISE_MAGNITUDE:.2f}", True, FONT_COLOR)
    surface.blit(noise_txt, (10, 100))

    instr_txt = font.render("L2/R2: gamma, [/]: noise, R: reset", True, FONT_COLOR)
    surface.blit(instr_txt, (10, 70))

    # Timer display
    elapsed_time = time.time() - last_reset_time
    timer_text = font.render(f"Time: {elapsed_time:.1f}s", True, FONT_COLOR)
    surface.blit(timer_text, (10, 130))

    if reached_goal:
        r_txt = font.render(f"Goal Reached in {elapsed_time:.1f}s! Auto-resetting...", True, FONT_COLOR)
        surface.blit(r_txt, (150, 110))

    # Legend
    legend_y = FULL_VIEW_SIZE[1] - int(100 * SCALING_FACTOR)
    legend_spacing = int(30 * SCALING_FACTOR)
    legend_items = [
        ("Green Arrow: Perfect Path (W)", GREEN),
        ("Blue Arrow: Human Movement (H)", BLUE),
        ("Red Arrow: Dot's Movement", RED),
        ("Gray line: Movement History", (200, 200, 200))
    ]
    for i, (lbl, color) in enumerate(legend_items):
        label = font.render(lbl, True, color)
        surface.blit(label, (10, legend_y + i*legend_spacing))

###############################################################################
# 2) render_red_only: Just the dot + red arrow in window #2
###############################################################################
def render_red_only(surface, x_dir):
    surface.fill(WHITE)

    # Draw all targets
    for i, target in enumerate(targets):
        pygame.draw.circle(surface, YELLOW, (int(target[0]), int(target[1])), TARGET_RADIUS)
        num_text = font.render(str(i + 1), True, BLACK)
        surface.blit(num_text, (target[0] - 5, target[1] - 12))


    # Highlight current target
    curr_t = targets[current_target_idx]
    pygame.draw.circle(surface, BLACK, (int(curr_t[0]), int(curr_t[1])),
                      TARGET_RADIUS + 2, int(2 * SCALING_FACTOR))

    # Dot
    pygame.draw.circle(surface, BLACK, (int(dot_pos[0]), int(dot_pos[1])),
                      DOT_RADIUS, int(2 * SCALING_FACTOR))

    # Red arrow only
    if x_dir != [0,0]:
        draw_arrow(surface, RED, (int(dot_pos[0]), int(dot_pos[1])), x_dir, ARROW_LENGTH)

    # Timer display
    elapsed_time = time.time() - last_reset_time
    timer_text = font.render(f"Time: {elapsed_time:.1f}s", True, BLACK)
    surface.blit(timer_text, (10, 10))

    # Goal reached text - modified to remove time
    if reached_goal:
        completion_text = font.render("Goal Reached!", True, BLACK)
        text_rect = completion_text.get_rect(center=(RED_ONLY_SIZE[0]/2, 40))
        surface.blit(completion_text, text_rect)

###############################################################################
# Initialize obstacles/targets, main loop
###############################################################################
generate_obstacles()
generate_targets()

running = True
clock = pygame.time.Clock()

while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

        # Keyboard noise change
        if event.type == pygame.KEYDOWN:
            if event.key == pygame.K_LEFTBRACKET:
                NOISE_MAGNITUDE = max(MIN_NOISE, NOISE_MAGNITUDE - NOISE_STEP)
            elif event.key == pygame.K_RIGHTBRACKET:
                NOISE_MAGNITUDE = min(MAX_NOISE, NOISE_MAGNITUDE + NOISE_STEP)
            if event.key == pygame.K_r:
                reset()

        if joystick and event.type == pygame.JOYBUTTONDOWN:
            # example: button #2 -> reset
            if event.button == 2:
                reset()

        # Timer event for auto-reset after goal
        if event.type == pygame.USEREVENT:
            reset()

    # Movement
    if not reached_goal:
        dx, dy = 0.0, 0.0
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
            dx -= 1
        if keys[pygame.K_RIGHT]:
            dx += 1
        if keys[pygame.K_UP]:
            dy -= 1
        if keys[pygame.K_DOWN]:
            dy += 1

        if joystick:
            axis_0 = joystick.get_axis(0)
            axis_1 = joystick.get_axis(1)
            deadzone = 0.1
            if abs(axis_0) > deadzone or abs(axis_1) > deadzone:
                dx = axis_0
                dy = axis_1
            else:
                dx = 0.0
                dy = 0.0

            # triggers for gamma
            l2_val = joystick.get_axis(AXIS_L2)
            r2_val = joystick.get_axis(AXIS_R2)
            if l2_val > 0.1:
                gamma = max(0.0, gamma - 0.01)
            if r2_val > 0.1:
                gamma = min(1.0, gamma + 0.01)

        # small keyboard deadzone
        if abs(dx) < 0.1 and abs(dy) < 0.1:
            dx, dy = 0.0, 0.0

        dx *= MAX_SPEED
        dy *= MAX_SPEED
        human_input = [dx, dy]

        # Possibly switch target
        proposed_idx = predict_human_target(human_input)
        current_target_idx = proposed_idx

        # Move the dot
        h_dir, w_dir, x_dir = move_dot(human_input)
        # Update ghost path
        recent_positions.append((dot_pos[0], dot_pos[1], time.time()))
    else:
        # If goal reached, no movement
        h_dir, w_dir, x_dir = [0,0], [0,0], [0,0]

    ############################################################################
    # 1) Draw to surfaces
    ############################################################################
    render_full_view(surface_full, h_dir, w_dir, x_dir)
    render_red_only(surface_red_only, x_dir)

    ############################################################################
    # 2) Convert surfaces -> textures
    ############################################################################
    tex1 = surface_to_texture(renderer1, surface_full)
    tex2 = surface_to_texture(renderer2, surface_red_only)

    ############################################################################
    # 3) Blit each texture on its window
    ############################################################################
    renderer1.clear()
    tex1.draw(dstrect=(0, 0, FULL_VIEW_SIZE[0], FULL_VIEW_SIZE[1]))
    renderer1.present()

    renderer2.clear()
    tex2.draw(dstrect=(0, 0, RED_ONLY_SIZE[0], RED_ONLY_SIZE[1]))
    renderer2.present()

    clock.tick(60)

pygame.quit()

pygame 2.6.0 (SDL 2.28.4, Python 3.10.9)
Hello from the pygame community. https://www.pygame.org/contribute.html
Joystick initialized: DualSense Wireless Controller




Observation: [ 6.00000000e+02  4.00000000e+02  7.94306222e-01 -6.07517593e-01
  6.47000000e+02  5.16000000e+02  3.75519633e-01  9.26814437e-01
  1.25159897e+02 -2.64778494e-01]
Predicted gamma: [0.]
Observation: [ 6.00314331e+02  3.98993835e+02  8.44864064e-01 -5.34981041e-01
  6.47000000e+02  5.16000000e+02  3.70591283e-01  9.28796053e-01
  1.25976166e+02 -1.83789022e-01]
Predicted gamma: [0.]
Observation: [ 6.04706482e+02  3.96604492e+02  8.41525814e-01 -5.40216904e-01
  6.47000000e+02  5.16000000e+02  3.33900511e-01  9.42608297e-01
  1.26665031e+02 -2.28227037e-01]
Predicted gamma: [0.]
Observation: [ 6.06395447e+02  3.91898407e+02  8.41525814e-01 -5.40216904e-01
  6.47000000e+02  5.16000000e+02  3.10966313e-01  9.50420856e-01
  1.30575409e+02 -2.51747233e-01]
Predicted gamma: [0.]
Observation: [ 6.09813293e+02  3.88248932e+02  8.32603197e-01 -5.53869945e-01
  6.47000000e+02  5.16000000e+02  2.79487252e-01  9.60149467e-01
  1.33053314e+02 -2.99095953e-01]
Predicted gamma: [0.]
Obser

  return float(action[0])


Observation: [ 6.17856567e+02  3.77593231e+02  7.73944459e-01 -6.33253483e-01
  6.47000000e+02  5.16000000e+02  2.06045449e-01  9.78542387e-01
  1.41441772e+02 -4.60197642e-01]
Predicted gamma: [0.]
Observation: [ 6.21075745e+02  3.73767426e+02  7.53712175e-01 -6.57204654e-01
  6.47000000e+02  5.16000000e+02  1.79312512e-01  9.83792186e-01
  1.44575836e+02 -5.11402780e-01]
Predicted gamma: [0.]
Observation: [ 6.24425049e+02  3.77479858e+02  7.25261190e-01 -6.88473824e-01
  6.47000000e+02  5.16000000e+02  1.60850242e-01  9.86978769e-01
  1.40347641e+02 -5.62850610e-01]
Predicted gamma: [0.]
Observation: [ 6.27506958e+02  3.73542633e+02  6.88245585e-01 -7.25477784e-01
  6.47000000e+02  5.16000000e+02  1.35570914e-01  9.90767658e-01
  1.43784836e+02 -6.25473842e-01]
Predicted gamma: [0.]
Observation: [ 6.31390015e+02  3.70392700e+02  6.27883410e-01 -7.78307410e-01
  6.47000000e+02  5.16000000e+02  1.06595255e-01  9.94302511e-01
  1.46441650e+02 -7.06943619e-01]
Predicted gamma: [0.]
Obser