## PHASE 0: System Initialization


In [34]:
from rich.console import Console
from rich.markdown import Markdown

console = Console()

In [35]:
use_gemini = False
use_openai = False

In [36]:
# different generator for different room types, so different dataset_context for each room type
ROOM_TYPE = "livingroom"  # "bedroom" or "livingroom"
# general intro, idx_to_labels, coordinate system, measurement units,
def get_dataset_context(room_type):
    if room_type == "bedroom":
        dataset_context = {  
            "room_type": "bedroom",
            "total_scenes": 4042,
            "class_frequencies": {
                "nightstand": 0.27245508982035926,
                "double_bed": 0.17138137518067315,
                "wardrobe": 0.16079909147222796,
                "pendant_lamp": 0.12693578360520338,
                "ceiling_lamp": 0.06308073508156102,
                "tv_stand": 0.029888498864340286,
                "chair": 0.022816436093330582,
                "single_bed": 0.021216188313029113,
                "dressing_table": 0.020854842040057817,
                "cabinet": 0.020183770390253975,
                "table": 0.019667561428866404,
                "desk": 0.016260582283708445,
                "stool": 0.011459838942804047,
                "shelf": 0.0081561015899236,
                "kids_bed": 0.0081561015899236,
                "bookshelf": 0.0071753045632872185,
                "children_cabinet": 0.0071753045632872185,
                "dressing_chair": 0.006142886640512079,
                "armchair": 0.003716704521990502,
                "sofa": 0.0014970059880239522,
                "coffee_table": 0.0009807970266363824
            },
            "furniture_counts": {
                "nightstand": 5278,
                "double_bed": 3320,
                "wardrobe": 3115,
                "pendant_lamp": 2459,
                "ceiling_lamp": 1222,
                "tv_stand": 579,
                "chair": 442,
                "single_bed": 411,
                "dressing_table": 404,
                "cabinet": 391,
                "table": 381,
                "desk": 315,
                "stool": 222,
                "shelf": 158,
                "kids_bed": 158,
                "bookshelf": 139,
                "children_cabinet": 139,
                "dressing_chair": 119,
                "armchair": 72,
                "sofa": 29,
                "coffee_table": 19
            },
            "idx_to_labels": {
                0: "armchair",
                1: "bookshelf",
                2: "cabinet",
                3: "ceiling_lamp",
                4: "chair",
                5: "children_cabinet",
                6: "coffee_table",
                7: "desk",
                8: "double_bed",
                9: "dressing_chair",
                10: "dressing_table",
                11: "kids_bed",
                12: "nightstand",
                13: "pendant_lamp",
                14: "shelf",
                15: "single_bed",
                16: "sofa",
                17: "stool",
                18: "table",
                19: "tv_stand",
                20: "wardrobe",
                21: "empty"
            },
            "num_classes_with_empty": 22,
            "num_classes_without_empty": 21,
            "room_type": "bedroom",
            "max_objects": 12
        }
    elif room_type == "livingroom":
        dataset_context = {
            "room_type": "livingroom",
        "total_scenes": 2926,
        "class_frequencies": {
                "dining_chair": 0.25492085340674464,
                "pendant_lamp": 0.13282863041982107,
                "coffee_table": 0.08616655196145905,
                "corner_side_table": 0.07240192704748796,
                "dining_table": 0.06951135581555402,
                "tv_stand": 0.06221610461114935,
                "multi_seat_sofa": 0.05299380591878871,
                "armchair": 0.048313833448038544,
                "console_table": 0.037026841018582245,
                "lounge_chair": 0.03234686854783207,
                "stool": 0.0264280798348245,
                "cabinet": 0.023124569855471438,
                "bookshelf": 0.02202339986235375,
                "loveseat_sofa": 0.020922229869236062,
                "ceiling_lamp": 0.018169304886441844,
                "wine_cabinet": 0.012112869924294563,
                "l_shaped_sofa": 0.01032346868547832,
                "round_end_table": 0.0057811424638678595,
                "shelf": 0.0035788024776324846,
                "chinese_chair": 0.0031658637302133517,
                "wardrobe": 0.0027529249827942187,
                "chaise_longue_sofa": 0.0011011699931176876,
                "desk": 0.0009635237439779766,
                "lazy_sofa": 0.0008258774948382657
            },
            "furniture_counts": {
                "dining_chair": 1852,
                "pendant_lamp": 965,
                "coffee_table": 626,
                "corner_side_table": 526,
                "dining_table": 505,
                "tv_stand": 452,
                "multi_seat_sofa": 385,
                "armchair": 351,
                "console_table": 269,
                "lounge_chair": 235,
                "stool": 192,
                "cabinet": 168,
                "bookshelf": 160,
                "loveseat_sofa": 152,
                "ceiling_lamp": 132,
                "wine_cabinet": 88,
                "l_shaped_sofa": 75,
                "round_end_table": 42,
                "shelf": 26,
                "chinese_chair": 23,
                "wardrobe": 20,
                "chaise_longue_sofa": 8,
                "desk": 7,
                "lazy_sofa": 6
            },
            "idx_to_labels": {
                0: "armchair",
                1: "bookshelf",
                2: "cabinet",
                3: "ceiling_lamp",
                4: "chaise_longue_sofa",
                5: "chinese_chair",
                6: "coffee_table",
                7: "console_table",
                8: "corner_side_table",
                9: "desk",
                10: "dining_chair",
                11: "dining_table",
                12: "l_shaped_sofa",
                13: "lazy_sofa",
                14: "lounge_chair",
                15: "loveseat_sofa",
                16: "multi_seat_sofa",
                17: "pendant_lamp",
                18: "round_end_table",
                19: "shelf",
                20: "stool",
                21: "tv_stand",
                22: "wardrobe",
                23: "wine_cabinet",
                24: "empty"
                },
            "num_classes_with_empty": 25,
            "num_classes_without_empty": 24,
            "room_type": "livingroom",
            "max_objects": 21
        }
    else:
        raise ValueError(f"Room type {room_type} not supported")

    return dataset_context


In [37]:
dataset_facts = """
The dataset being used is 3D-FRONT which uses 3D-FUTURE dataset for furniture models. 3D-FRONT is a collection of synthetic, high-quality 3D indoor scenes, highlighted by professionally and distinctively designed layouts.

In this dataset, the following facts are important to know:

## Coordinate System
- Y-axis: Vertical (up direction)
- XZ-plane: Floor plane
- Units: Meters (world coordinates, unnormalized)
- Empty slots: Have index (num_classes-1), near-zero size/position

# Important Facts about 3D-FRONT dataset
- Ceiling objects are at y ≈ ceiling_height (typically 2.8m)
- Floor objects have y ≈ object_height/2
- Ignore empty slots (is_empty == True) in calculations
"""

In [38]:
scene_representation = """
A 3D scene is represented in batch format (parsed_scenes) a dictionary with the following keys and PyTorch tensors as values:
    - `positions`: (B, N, 3) - Object centroids in meters (x, y, z)
    - `sizes`: (B, N, 3) - Half-extents (sx/2, sy/2, sz/2)
    - `object_indices`: (B, N) - Class indices [0, num_classes-1]
    - `one_hot`: (B, N, num_classes) - One-hot encoded classes
    - `is_empty`: (B, N) - Boolean mask (True = empty slot)
    - `orientations`: (B, N, 2) - [cos(θ), sin(θ)] for z-rotation
    - `device`: torch.device
    Where:
        - B = Batch size
        - N = Max objects per scene
"""

In [39]:
room_stats = {
    "bedroom": {
        "max_objects": 12,
        "num_classes": 22,
        "num_classes_with_empty": 22,
        "num_classes_without_empty": 21,
    },
    "livingroom": {
        "max_objects": 21,
        "num_classes": 25,
        "num_classes_with_empty": 25,
        "num_classes_without_empty": 24,
    }
}

In [40]:
# reward function template, put the detailed arg datatypes and descriptions
reward_function_template = f"""
```python
def get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):
    '''
    Input:
        - parsed_scenes: list of parsed scenes
            Format:
            Scenes are provided as dictionaries with PyTorch tensors:
                - `positions`: (B, N, 3) - Object centroids in meters (x, y, z)
                - `sizes`: (B, N, 3) - Half-extents (sx/2, sy/2, sz/2)
                - `object_indices`: (B, N) - Class indices [0, num_classes-1]
                - `one_hot`: (B, N, num_classes) - One-hot encoded classes
                - `is_empty`: (B, N) - Boolean mask (True = empty slot)
                - `orientations`: (B, N, 2) - [cos(θ), sin(θ)] for z-rotation
                - `device`: torch.device
                Where:
                    - B = Batch size
                    - N = Max objects per scene
        
        - idx_to_labels: dictionary mapping class indices to class labels
        - room_type: string, Example: "bedroom" or "livingroom"
        - Floor Polygons (floor_polygons): A list of ordered floor_polygons in the format [(x1, z1), (x2, z2), ...(xn, zn)]  where n >= 4, and always forms a closed polygon
        - **kwargs: additional keyword arguments

    Output:
        reward: torch.Tensor of shape (len(parsed_scenes),)
    '''
    
    # Logic of reward function here
    return reward

def test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):
    '''
    Input:
        - idx_to_labels: dictionary mapping class indices to class labels
        - room_type: string, Example: "bedroom" or "livingroom"
        - floor_polygons: A list of ordered floor_polygons in the format [(x1, z1), (x2, z2), ...(xn, zn)]  where n >= 4, and always forms a closed polygon
        - **kwargs: additional keyword arguments
    '''
    # Create some test scenes using create_scene_for_testing
    # Scene 1
    num_objects_1 = 5
    class_label_indices_1 = [0, 1, 2, 3, 4]
    translations_1 = [(0, 0, 0), (1, 0, 0), (2, 0, 0), (3, 0, 0), (4, 0, 0)]
    sizes_1 = [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]
    orientations_1 = [(1, 0), (1, 0), (1, 0), (1, 0), (1, 0)]
    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)
    
    # Scene 2
    num_objects_2 = 6
    class_label_indices_2 = [0, 1, 2, 3, 4, 5]
    translations_2 = [(0, 0, 0), (1, 0, 0), (2, 0, 0), (3, 0, 0), (4, 0, 0), (5, 0, 0),]
    sizes_2 = [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]
    orientations_2 = [(1, 0), (1, 0), (1, 0), (1, 0), (1, 0), (1, 0)]
    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)
    
    # Scene 3
    num_objects_3 = 4
    class_label_indices_3 = [0, 1, 3, 4]
    translations_3 = [(0, 0, 0), (1, 0, 0), (2, 0, 0), (3, 0, 0)]
    sizes_3 = [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]
    orientations_3 = [(1, 0), (1, 0), (1, 0), (1, 0)]
    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)
    
    
    # Stack each key of the parsed_scene dicts into a batched dict
    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]
    parsed_scenes = {{
        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)
        for k in tensor_keys
    }}
    parsed_scenes['room_type'] = room_type
    parsed_scenes['device'] = scene_1['device']
    
    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)
    print("Rewards:", rewards)
    assert rewards.shape[0] == len(parsed_scenes)
    
    # You have to add different test cases here to verify that the reward function is working as expected
    assert <TEST_CASE_1>
    assert <TEST_CASE_2>
    ...
    assert <TEST_CASE_N>
```
"""

In [41]:
from universal_constraint_rewards.commons import get_all_universal_reward_functions

universal_rewards_info = get_all_universal_reward_functions()
universal_rewards_info_with_docstrings = {}

for reward_name, reward_info in universal_rewards_info.items():
    universal_rewards_info_with_docstrings[reward_name] = {
        "function": reward_info.__name__,
        "description": reward_info.__doc__,
    }

In [42]:
universal_rewards_info_with_docstrings

{'non_penetration': {'function': 'compute_non_penetration_reward',
  'description': '\n    Calculate reward based on non-penetration constraint using penetration depth.\n\n    Following the approach from original authors: reward = sum of negative signed distances.\n    When objects overlap, we get positive penetration depth, so reward is negative.\n\n    Args:\n        parsed_scene: Dict returned by parse_and_descale_scenes()\n\n    Returns:\n        rewards: Tensor of shape (B,) with non-penetration rewards for each scene\n    '},
 'not_out_of_bound': {'function': 'compute_boundary_violation_reward',
  'description': "\n    Compute boundary violation reward using cached SDF grids.\n\n    **IMPORTANT**: Call `precompute_sdf_cache()` once before training to generate cache!\n\n    Args:\n        parsed_scene: Dictionary with positions, sizes, is_empty, device\n        floor_polygons: (B, num_vertices, 2) - only needed if cache doesn't exist\n        indices: (B,) - scene indices for SDF 

In [43]:
from utilities import get_all_utility_functions

utility_functions = get_all_utility_functions(is_prompt=True)

utility_functions

{'find_object_front_and_back': {'function': 'find_object_front_and_back',
  'description': '\n    Find the coordinates of the front and back centers of a object.\n\n    Args:\n        position: (1,3) tensor - object centroid (x, y, z)\n        orientation: (1,2) tensor - [cos(θ), sin(θ)], z-rotation\n        size: (1,3) tensor - half-extents (sx/2, sy/2, sz/2)\n    \n    Returns:\n        front_center: (1,3) tensor - position of object front\n        back_center: (1,3) tensor - position of object back\n    '},
 'find_closest_wall_to_object': {'function': 'find_closest_wall_to_object',
  'description': "\n    Find which wall is closest to the object's front or back and compute its distance.\n\n    Args:\n        position: (1,3) tensor - object centroid (x, y, z)\n        orientation: (1,2) tensor - z-rotation\n        size: (1,3) tensor - half-extents (sx/2, sy/2, sz/2)\n        floor_polygons: list of ordered floor polygon vertices in the format [(x1, z1), (x2, z2), ...(xn, zn)]  where

## PHASE 1: Initial Constraint Decomposition

In [None]:
# USER_PROMPT = "A bedroom with 4 ceiling_lamps forming a rectangular shape." # out of distribution example
USER_PROMPT = "A classroom for 10 students."  # out of distribution example
USER_PROMPT = "A bedroom with ceiling lamp in each corner of the bed." # out of distribution example


dataset_context = get_dataset_context(ROOM_TYPE)

In [45]:
# Avoid using f-string here to prevent invalid format specifier error
llm_instruction_1 = f"""
# TASK: Constraint Decomposition for 3D Scene Generation

You are an expert in 3D scene generation, interior design, and reinforcement learning. Your task is to analyze a user prompt and decompose it into verifiable constraints with Python reward functions.

## CONTEXT

### Dataset: 3D-FRONT
{dataset_facts}

Note: While generating constraints, no need to verify these facts with you constraints, focus on the constraints other than these facts.

In this task, you are provided with a user prompt and a dataset context. Your task is to decompose the user prompt into verifiable constraints with Python reward functions.

Here is the dataset information in JSON format about the specific room type: {ROOM_TYPE} you will be working on:
```json
{dataset_context}
```

Also, the baseline model is already trained on some universal constraints, so you do not need to consider these constraints while generating new ones. The universal constraints are:
```json
{universal_rewards_info_with_docstrings}
```

## YOUR TASK

Analyze the user prompt and provide a comprehensive JSON response with the following structure:

### 1. CONSTRAINT DECOMPOSITION

Generate ALL constraints needed to satisfy the prompt strictly in following format.

```json
{{
  "constraints": [
    {{
      "id": "C1",
      "name": "descriptive_snake_case_name",
      "description": "Clear description of what this checks"
    }},
    {{
      "id": "C2",
      "name": "descriptive_snake_case_name",
      "description": "Clear description of what this checks"
    }},
    ...
    {{
      "id": "Cn",
      "name": "descriptive_snake_case_name",
      "description": "Clear description of what this checks"
    }}
  ]
}}
```
"""

In [46]:
console.print(Markdown(llm_instruction_1))

In [47]:
from google import genai
from google.genai import types
from dotenv import load_dotenv
load_dotenv()

client = genai.Client()

In [48]:
import os
from openai import AzureOpenAI
from dotenv import load_dotenv
load_dotenv()
if use_openai:
    client = AzureOpenAI(
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    )
    deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT") 

In [49]:
if use_gemini:
    llm_response_1 = client.models.generate_content(
        model="gemini-2.5-pro",
        config=types.GenerateContentConfig(
            system_instruction=llm_instruction_1),
        contents=USER_PROMPT,
    )

    print(llm_response_1.text)

    constraints = llm_response_1.text

In [50]:
if use_openai:
    llm_response_1 = client.chat.completions.create(
        model=deployment,
        messages=[
            {"role": "system", "content": llm_instruction_1},
            {"role": "user", "content": USER_PROMPT},
        ],
    )

    constraints = llm_response_1.choices[0].message.content

In [51]:
# constraints = {
#   "constraints": [
#     {
#       "id": "C1",
#       "name": "exactly_four_ceiling_lamps",
#       "description": "Verifies that the scene contains exactly 4 ceiling lamps (no more, no fewer)"
#     },
#     {
#       "id": "C2",
#       "name": "ceiling_lamps_form_rectangle",
#       "description": "Checks that the 4 ceiling lamps are positioned to form a rectangular shape in the XZ plane, with 4 vertices forming approximately 90-degree corners"
#     },
#     {
#       "id": "C3",
#       "name": "ceiling_lamps_coplanar",
#       "description": "Ensures all 4 ceiling lamps are at approximately the same Y-coordinate (height), forming a planar rectangle parallel to the floor"
#     },
#     {
#       "id": "C4",
#       "name": "ceiling_lamps_at_ceiling_height",
#       "description": "Verifies that all ceiling lamps are positioned near the ceiling (y ≈ ceiling_height, typically around 2.8m), not floating at arbitrary heights"
#     },
#     {
#       "id": "C5",
#       "name": "rectangle_aspect_ratio_reasonable",
#       "description": "Checks that the rectangle formed by ceiling lamps has a reasonable aspect ratio (not degenerate into a line or extremely elongated shape), with aspect ratio between 1:3 and 3:1"
#     },
#     {
#       "id": "C6",
#       "name": "ceiling_lamps_evenly_distributed",
#       "description": "Ensures the 4 ceiling lamps are positioned at the corners of the rectangle with approximately equal spacing along opposite sides (parallel sides have similar lengths)"
#     }
#   ]
# }

In [52]:
constraints = {
  "constraints": [
    {
      "id": "C1",
      "name": "minimum_seating_capacity",
      "description": "Ensure the scene contains seating for at least 10 students. Count all seating furniture (dining_chair, stool, armchair, lounge_chair, chinese_chair) and verify total capacity ≥ 10."
    },
    {
      "id": "C2",
      "name": "has_teaching_workspace",
      "description": "Verify the scene contains at least one desk or console_table that can serve as a teacher's workspace/teaching station."
    },
    {
      "id": "C3",
      "name": "organized_seating_arrangement",
      "description": "Check that seating furniture (chairs/stools) are arranged in an organized pattern facing a common direction, suggesting a classroom layout rather than scattered placement. Measure angular alignment and spatial clustering of seating."
    },
    {
      "id": "C4",
      "name": "sufficient_student_workspace",
      "description": "Ensure there are adequate work surfaces for students. Count dining_table, desk, or console_table instances and verify reasonable proximity to seating (within 1.5m of chairs)."
    },
    {
      "id": "C5",
      "name": "chair_to_table_pairing",
      "description": "Verify that each seating position (chair/stool) has access to a nearby table surface within functional distance (≤ 1.0m), ensuring students have workspace at their seats."
    },
    {
      "id": "C6",
      "name": "adequate_lighting",
      "description": "Check for presence of sufficient lighting fixtures (pendant_lamp, ceiling_lamp) appropriate for a learning environment. Minimum 1-2 light sources for a 10-student classroom."
    },
    {
      "id": "C7",
      "name": "minimal_living_room_furniture",
      "description": "Penalize presence of typical living room furniture that doesn't belong in a classroom (sofas, coffee_tables, tv_stands, lounge_chairs). Educational spaces should not contain leisure furniture."
    },
    {
      "id": "C8",
      "name": "storage_or_display_availability",
      "description": "Verify presence of storage/display furniture (bookshelf, cabinet, shelf) for classroom materials and student belongings. At least one storage unit should be present."
    }
  ]
}

In [53]:
print(constraints)

{'constraints': [{'id': 'C1', 'name': 'minimum_seating_capacity', 'description': 'Ensure the scene contains seating for at least 10 students. Count all seating furniture (dining_chair, stool, armchair, lounge_chair, chinese_chair) and verify total capacity ≥ 10.'}, {'id': 'C2', 'name': 'has_teaching_workspace', 'description': "Verify the scene contains at least one desk or console_table that can serve as a teacher's workspace/teaching station."}, {'id': 'C3', 'name': 'organized_seating_arrangement', 'description': 'Check that seating furniture (chairs/stools) are arranged in an organized pattern facing a common direction, suggesting a classroom layout rather than scattered placement. Measure angular alignment and spatial clustering of seating.'}, {'id': 'C4', 'name': 'sufficient_student_workspace', 'description': 'Ensure there are adequate work surfaces for students. Count dining_table, desk, or console_table instances and verify reasonable proximity to seating (within 1.5m of chairs).

In [54]:
# constraints = {
#   "constraints": [
#     {
#       "id": "C1",
#       "name": "ceiling_lamps_count",
#       "description": "Ensure that the scene contains exactly 4 objects of type 'ceiling_lamp'. This verifies that the number of ceiling lamps in the scene equals four."
#     },
#     {
#       "id": "C2",
#       "name": "ceiling_lamps_rectangular_layout",
#       "description": "Verify that the 4 ceiling_lamp objects are arranged in a rectangular pattern. This involves checking that the 4 lamps' positions (in the XZ-plane, at the typical ceiling height) form the vertices of a rectangle by testing symmetry and right angles between adjacent edges."
#     }
#   ]
# }

In [55]:
llm_instruction_2 = f"""
# TASK: Constraints to reward code mapping

You are an expert in 3D scene generation, interior design, and reinforcement learning.
Your task is to analyze given constraints and convert them into verifiable reward functions in Python.

## CONTEXT

### Dataset: 3D-FRONT
{dataset_facts}

In this task, you are provided with a user prompt and a dataset context. Your task is to decompose the user prompt into verifiable constraints with Python reward functions.

Here is the dataset information in JSON format about the specific room type: {ROOM_TYPE} you will be working on:
```json
{dataset_context}
```

### Scene Representation
{scene_representation}

### You also have the following utility functions at your disposal which you can use according to the given docstrings.
```json
{utility_functions}
```

### The baseline model is already trained on some universal constraints, so you do not need to consider these constraints while generating new ones. The universal constraints are:
```json
{universal_rewards_info_with_docstrings}
```

## YOUR TASK

Analyze the user prompt, constraints to be satisfied for that prompt and all other context i have provided, then provide a comprehensive JSON response with the following structure:

The template for reward function to quantify each constraint satisfaction with python code is as follows:
{reward_function_template}

Note: While using the utility functions, you can use the following code snippet:

```python
from dynamic_constraint_rewards.utilities import get_all_utility_functions

utility_functions = get_all_utility_functions()
return_values = utility_functions["function_name"]["function"](required_arguments(from docstring), **kwargs)
```

Also, passing all required parameters to the utlity functions is a must
(Example: don't miss room_type for create_scene_for_testing)

Also, Given the reward constraints, analyze and if there are constraints like: a scene must have n number of objects of a particular class, then inpaint those objects. To inpaint, pass the class labels and counts in the json format as specified below.

Example: If R1 = "a scene must have exactly 4 ceiling lamps", R2 = "a scene must have exactly 2 nightstands", then inpaint the objects with:

```
"inpaint": {{
  "ceiling_lamp": 4,
  "nightstand": 2
}}
```

Also, success_threshold is a float type, that indicates the constraint is satisfied (if unnormalized_raw_reward_value >= success_threshold)

Only return the following JSON response (nothing else), follow this structure strictly:

```json
{{
  "rewards": [
    {{
      "id": "R1",
      "constraint_id": "C1",
      "name": "descriptive_snake_case_name",
      "code": "Python Code implementing get_reward and test_reward functions as per the template",
      "success_threshold": "Value in terms of raw reward units as implemented in Python code indicating satisfactory fulfillment of the constraint. This will be used to calculate success rate."
    }},
    {{
      "id": "R2",
      "constraint_id": "C2",
      "name": "descriptive_snake_case_name",
      "code": "Python Code implementing get_reward and test_reward functions as per the template",
      "success_threshold": "Value in terms of raw reward units as implemented in Python code indicating satisfactory fulfillment of the constraint. This will be used to calculate success rate."
    }},
    ...
    {{
      "id": "Rn",
      "constraint_id": "Cn",
      "name": "descriptive_snake_case_name",
      "code": "Python Code implementing get_reward and test_reward functions as per the template",
      "success_threshold": "Value in terms of raw reward units as implemented in Python code indicating satisfactory fulfillment of the constraint. This will be used to calculate success rate."
    }}
  ],
"inpaint": {{
  "class_label1": count1,
  "class_label2": count2,
  ...,
  "class_labeln": countn
  }}
}}
```

NOTE: Even if you have inpainted objects due to some constraints, keep those constraints in the rewards list.
NOTE: If you are passing any other arguments other than specified in the function descriptions, make sure to get it from the kwargs dictionary. (kwargs.get("argument_name"))
NOTE: You should use the utility functions exactly as the docstrings provided, all arguments should be passed in the same order as in the docstrings. (followed by kwargs if required)
"""

In [56]:
console.print(Markdown(llm_instruction_2))


In [57]:
llm_user_prompt_2 = f"""
User Prompt: {USER_PROMPT}

Constraints to be satisfied: {constraints}
"""

console.print(Markdown(llm_user_prompt_2))

In [58]:
if use_gemini:
    llm_response_2 = client.models.generate_content(
        model="gemini-2.5-pro",
        config=types.GenerateContentConfig(
            system_instruction=llm_instruction_2),
        contents=llm_user_prompt_2,
    )

    reward_functions = llm_response_2.text

In [59]:
if use_openai:
    completion = client.chat.completions.create(
        model=deployment,
        messages=[
            {"role": "system", "content": llm_instruction_2},
            {"role": "user", "content": llm_user_prompt_2},
        ],
    )

    reward_functions = completion.choices[0].message.content



In [60]:
# import json
# with open("reward_functions_output.json", "w", encoding="utf-8") as f:
#     json.dump(reward_functions, f, indent=4, ensure_ascii=False)

# print(reward_functions)
# print(type(reward_functions))


In [61]:
# rewards_dict = {
#   "rewards": [
#     {
#       "id": "R1",
#       "constraint_id": "C1",
#       "name": "ceiling_lamps_count",
#       "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Ensure that the scene contains exactly 4 ceiling_lamp objects.\n\n    Reward is 1.0 if exactly 4 ceiling lamps are present in the scene, otherwise penalty of -|count-4| per scene.\n    '''\n    utility_functions = get_all_utility_functions()\n    one_hot = parsed_scenes['one_hot']  # shape (B, N, num_classes)\n    B = one_hot.shape[0]\n    rewards = []\n    # Process each scene in batch\n    for i in range(B):\n        scene_one_hot = one_hot[i]  # (N, num_classes)\n        # Use the utility function to count objects of type 'ceiling_lamp'\n        count = utility_functions['get_object_count_in_a_scene']['function'](\n            scene_one_hot.unsqueeze(0), 'ceiling_lamp', idx_to_labels\n        )\n        # Reward: 1 when exactly 4, else negative penalty scaled by difference\n        if count == 4:\n            reward_val = 1.0\n        else:\n            reward_val = -abs(count - 4)\n        rewards.append(reward_val)\n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    # Create a scene that satisfies the constraint: exactly 4 ceiling lamps\n    num_objects = 4\n    class_label_indices = [3, 3, 3, 3]  # Assuming index 3 maps to 'ceiling_lamp'\n    translations = [(1, 2.8, 1), (1, 2.8, 3), (3, 2.8, 3), (3, 2.8, 1)]\n    sizes = [(0.3, 0.3, 0.3)] * 4\n    orientations = [(1, 0)] * 4\n    scene_valid = create_scene_for_testing(room_type, num_objects, class_label_indices, translations, sizes, orientations)\n\n    # Create a scene that does not satisfy the count constraint, e.g., only 3 ceiling lamps\n    num_objects_bad = 3\n    class_label_indices_bad = [3, 3, 3]\n    translations_bad = [(1, 2.8, 1), (1, 2.8, 3), (3, 2.8, 1)]\n    sizes_bad = [(0.3, 0.3, 0.3)] * 3\n    orientations_bad = [(1, 0)] * 3\n    scene_invalid = create_scene_for_testing(room_type, num_objects_bad, class_label_indices_bad, translations_bad, sizes_bad, orientations_bad)\n\n    # Stack scenes into a batch\n    tensor_keys = [k for k in scene_valid if isinstance(scene_valid[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_valid[k], scene_invalid[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_valid['device']\n\n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Ceiling Lamp Count Rewards:\", rewards)\n\n    # Test cases\n    # For valid scene: reward should be 1.0\n    assert torch.isclose(rewards[0], torch.tensor(1.0, device=parsed_scenes['device']))\n    # For invalid scene: count=3, so reward should be -1\n    assert torch.isclose(rewards[1], torch.tensor(-1.0, device=parsed_scenes['device']))\n\n    return rewards\n",
#       "success_threshold": "1.0"
#     },
#     {
#       "id": "R2",
#       "constraint_id": "C2",
#       "name": "ceiling_lamps_rectangular_layout",
#       "code": "import torch\nimport math\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef is_rectangle(pts, tol=0.1):\n    \"\"\"\n    Given pts as a list of 4 (x,z) tuples, check if they form a rectangle.\n    Steps:\n      1. Compute centroid.\n      2. Compute angles of each point with respect to centroid and sort.\n      3. Compute vectors for consecutive edges and check if adjacent edges are perpendicular.\n    \"\"\"\n    if len(pts) != 4:\n        return False\n    cx = sum(p[0] for p in pts) / 4.0\n    cz = sum(p[1] for p in pts) / 4.0\n    # Compute angle from centroid\n    pts_with_angle = []\n    for p in pts:\n        angle = math.atan2(p[1] - cz, p[0] - cx)\n        pts_with_angle.append((p, angle))\n    pts_with_angle.sort(key=lambda x: x[1])\n    sorted_pts = [p for p, angle in pts_with_angle]\n    # Check adjacent edges for right angle: use dot product\n    for i in range(4):\n        p1 = sorted_pts[i]\n        p2 = sorted_pts[(i+1)%4]\n        p3 = sorted_pts[(i+2)%4]\n        # vector from p1 to p2\n        v1 = (p2[0]-p1[0], p2[1]-p1[1])\n        # vector from p2 to p3\n        v2 = (p3[0]-p2[0], p3[1]-p2[1])\n        dot = v1[0]*v2[0] + v1[1]*v2[1]\n        norm1 = math.hypot(v1[0], v1[1])\n        norm2 = math.hypot(v2[0], v2[1])\n        if norm1 < 1e-6 or norm2 < 1e-6:\n            return False\n        cos_angle = dot / (norm1 * norm2)\n        # Check for 90 degrees within tolerance\n        if abs(cos_angle) > tol:\n            return False\n    return True\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Verify that the 4 ceiling_lamp objects are arranged in a rectangular pattern.\n    Conditions:\n      - Exactly 4 ceiling lamps\n      - Their y coordinate should be near the ceiling height (2.8 m +- 0.1)\n      - Their positions in the XZ-plane form a rectangle (using right-angle test)\n\n    Reward is 1.0 if rectangle is satisfied, otherwise -1.0 (or -10 if count condition is not met).\n    '''\n    utility_functions = get_all_utility_functions()\n    positions = parsed_scenes['positions']  # shape (B, N, 3)\n    object_indices = parsed_scenes['object_indices']  # shape (B, N)\n    is_empty = parsed_scenes['is_empty']  # shape (B, N)\n    B = positions.shape[0]\n    rewards = []\n    ceiling_height = 2.8\n    y_tol = 0.1\n    for i in range(B):\n        lamp_positions = []\n        count = 0\n        for j in range(positions.shape[1]):\n            if is_empty[i, j]:\n                continue\n            # Check if object is a ceiling lamp\n            obj_idx = int(object_indices[i, j].item())\n            if idx_to_labels[str(obj_idx)] == 'ceiling_lamp':\n                count += 1\n                pos = positions[i, j]\n                # Check y coordinate is near ceiling height\n                if abs(pos[1].item() - ceiling_height) > y_tol:\n                    # If any lamp is not at ceiling height, consider layout invalid\n                    lamp_positions = []\n                    break\n                lamp_positions.append((pos[0].item(), pos[2].item()))\n        # If not exactly 4 lamps, heavy penalty\n        if count != 4 or len(lamp_positions) != 4:\n            rewards.append(-10.0)\n        else:\n            # Check if positions form a rectangle\n            if is_rectangle(lamp_positions, tol=0.1):\n                rewards.append(1.0)\n            else:\n                rewards.append(-1.0)\n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    # Create a scene with 4 ceiling lamps arranged in a rectangle at ceiling height (2.8 m)\n    num_objects = 4\n    class_label_indices = [3, 3, 3, 3]  # Assuming index 3 maps to 'ceiling_lamp'\n    translations = [(1, 2.8, 1), (1, 2.8, 3), (3, 2.8, 3), (3, 2.8, 1)]\n    sizes = [(0.3, 0.3, 0.3)] * 4\n    orientations = [(1, 0)] * 4\n    scene_valid = create_scene_for_testing(room_type, num_objects, class_label_indices, translations, sizes, orientations)\n\n    # Create a scene with 4 ceiling lamps but not forming a rectangle (e.g., slightly perturbed)\n    translations_bad = [(1, 2.8, 1), (1, 2.8, 3), (3, 2.8, 3.2), (3, 2.8, 1)]  # third lamp moved\n    scene_invalid = create_scene_for_testing(room_type, num_objects, class_label_indices, translations_bad, sizes, orientations)\n\n    # Create a scene with incorrect count (e.g., 3 lamps)\n    num_objects_count = 3\n    class_label_indices_count = [3, 3, 3]\n    translations_count = [(1, 2.8, 1), (1, 2.8, 3), (3, 2.8, 1)]\n    sizes_count = [(0.3, 0.3, 0.3)] * 3\n    orientations_count = [(1, 0)] * 3\n    scene_wrong_count = create_scene_for_testing(room_type, num_objects_count, class_label_indices_count, translations_count, sizes_count, orientations_count)\n\n    # Stack scenes into a batch\n    import torch\n    tensor_keys = [k for k in scene_valid if isinstance(scene_valid[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_valid[k], scene_invalid[k], scene_wrong_count[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_valid['device']\n\n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Ceiling Lamp Rectangular Layout Rewards:\", rewards)\n\n    # Test cases:\n    # Scene 1: Valid rectangle => reward should be 1.0\n    assert torch.isclose(rewards[0], torch.tensor(1.0, device=parsed_scenes['device']))\n    # Scene 2: Lamps not forming a perfect rectangle => reward should be -1.0\n    assert torch.isclose(rewards[1], torch.tensor(-1.0, device=parsed_scenes['device']))\n    # Scene 3: Incorrect lamp count => reward should be -10.0\n    assert torch.isclose(rewards[2], torch.tensor(-10.0, device=parsed_scenes['device']))\n\n    return rewards\n",
#       "success_threshold": "1.0"
#     }
#   ],
#   "inpaint": {
#     "ceiling_lamp": 4
#   }
# }

In [62]:
reward_dict = {
  "rewards": [
    {
      "id": "R1",
      "constraint_id": "C1",
      "name": "minimum_seating_capacity",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Reward for having at least 10 seating positions in the scene.\n    Seating furniture includes: dining_chair, stool, armchair, lounge_chair, chinese_chair\n    \n    Returns:\n        reward: (B,) tensor - 0 if >= 10 seats, negative penalty otherwise\n    '''\n    utility_functions = get_all_utility_functions()\n    get_object_count = utility_functions[\"get_object_count_in_a_scene\"][\"function\"]\n    \n    one_hot = parsed_scenes['one_hot']  # (B, N, num_classes)\n    B = one_hot.shape[0]\n    device = parsed_scenes['device']\n    \n    # Define seating furniture types\n    seating_types = ['dining_chair', 'stool', 'armchair', 'lounge_chair', 'chinese_chair']\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        # Count total seating capacity\n        total_seats = 0\n        for seat_type in seating_types:\n            count = get_object_count(one_hot[b:b+1], seat_type, idx_to_labels)\n            total_seats += count\n        \n        # Reward: 0 if >= 10 seats, else penalty proportional to shortfall\n        if total_seats >= 10:\n            rewards[b] = 0.0\n        else:\n            rewards[b] = -(10 - total_seats) * 1.0  # -1.0 per missing seat\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the minimum_seating_capacity reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    # Get indices for furniture\n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    stool_idx = labels_to_idx.get('stool', 20)\n    table_idx = labels_to_idx.get('dining_table', 11)\n    \n    # Scene 1: 10 chairs (exactly meets requirement)\n    num_objects_1 = 10\n    class_label_indices_1 = [chair_idx] * 10\n    translations_1 = [(i, 0.4, 0) for i in range(10)]\n    sizes_1 = [(0.3, 0.4, 0.3)] * 10\n    orientations_1 = [(1, 0)] * 10\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: 5 chairs + 6 stools = 11 (exceeds requirement)\n    num_objects_2 = 11\n    class_label_indices_2 = [chair_idx] * 5 + [stool_idx] * 6\n    translations_2 = [(i, 0.4, 0) for i in range(11)]\n    sizes_2 = [(0.3, 0.4, 0.3)] * 11\n    orientations_2 = [(1, 0)] * 11\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: Only 6 chairs (below requirement)\n    num_objects_3 = 6\n    class_label_indices_3 = [chair_idx] * 6\n    translations_3 = [(i, 0.4, 0) for i in range(6)]\n    sizes_3 = [(0.3, 0.4, 0.3)] * 6\n    orientations_3 = [(1, 0)] * 6\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    # Stack scenes\n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    # Test assertions\n    assert rewards[0] >= -0.1, \"Scene 1 should have 0 penalty (10 chairs)\"\n    assert rewards[1] >= -0.1, \"Scene 2 should have 0 penalty (11 seats)\"\n    assert rewards[2] < -3.5, \"Scene 3 should have penalty (only 6 chairs)\"\n    print(\"All tests passed for minimum_seating_capacity!\")",
      "success_threshold": -0.5
    },
    {
      "id": "R2",
      "constraint_id": "C2",
      "name": "has_teaching_workspace",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Reward for having at least one desk or console_table for teacher workspace.\n    \n    Returns:\n        reward: (B,) tensor - 0 if has teaching workspace, -5.0 otherwise\n    '''\n    utility_functions = get_all_utility_functions()\n    get_object_count = utility_functions[\"get_object_count_in_a_scene\"][\"function\"]\n    \n    one_hot = parsed_scenes['one_hot']  # (B, N, num_classes)\n    B = one_hot.shape[0]\n    device = parsed_scenes['device']\n    \n    # Teaching workspace furniture\n    workspace_types = ['desk', 'console_table']\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        has_workspace = False\n        for workspace_type in workspace_types:\n            count = get_object_count(one_hot[b:b+1], workspace_type, idx_to_labels)\n            if count > 0:\n                has_workspace = True\n                break\n        \n        if has_workspace:\n            rewards[b] = 0.0\n        else:\n            rewards[b] = -5.0\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the has_teaching_workspace reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    desk_idx = labels_to_idx.get('desk', 7)\n    console_idx = labels_to_idx.get('console_table', 7)\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    \n    # Scene 1: Has desk\n    num_objects_1 = 2\n    class_label_indices_1 = [desk_idx, chair_idx]\n    translations_1 = [(0, 0.4, 0), (1, 0.4, 0)]\n    sizes_1 = [(0.6, 0.4, 0.4), (0.3, 0.4, 0.3)]\n    orientations_1 = [(1, 0), (1, 0)]\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has console_table\n    num_objects_2 = 2\n    class_label_indices_2 = [console_idx, chair_idx]\n    translations_2 = [(0, 0.4, 0), (1, 0.4, 0)]\n    sizes_2 = [(0.6, 0.4, 0.3), (0.3, 0.4, 0.3)]\n    orientations_2 = [(1, 0), (1, 0)]\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: No desk or console_table\n    num_objects_3 = 2\n    class_label_indices_3 = [chair_idx, chair_idx]\n    translations_3 = [(0, 0.4, 0), (1, 0.4, 0)]\n    sizes_3 = [(0.3, 0.4, 0.3), (0.3, 0.4, 0.3)]\n    orientations_3 = [(1, 0), (1, 0)]\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    assert rewards[0] >= -0.1, \"Scene 1 should have 0 penalty (has desk)\"\n    assert rewards[1] >= -0.1, \"Scene 2 should have 0 penalty (has console_table)\"\n    assert rewards[2] <= -4.9, \"Scene 3 should have penalty (no workspace)\"\n    print(\"All tests passed for has_teaching_workspace!\")",
      "success_threshold": -0.5
    },
    {
      "id": "R3",
      "constraint_id": "C3",
      "name": "organized_seating_arrangement",
      "code": "import torch\nimport math\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Reward for organized seating arrangement - chairs should face similar directions\n    and be spatially clustered.\n    \n    Returns:\n        reward: (B,) tensor - negative penalty for angular deviation and spatial scatter\n    '''\n    utility_functions = get_all_utility_functions()\n    compute_angle = utility_functions[\"compute_angle_between_objects\"][\"function\"]\n    distance_2d = utility_functions[\"distance_2d\"][\"function\"]\n    \n    positions = parsed_scenes['positions']  # (B, N, 3)\n    orientations = parsed_scenes['orientations']  # (B, N, 2)\n    object_indices = parsed_scenes['object_indices']  # (B, N)\n    is_empty = parsed_scenes['is_empty']  # (B, N)\n    B, N = positions.shape[0], positions.shape[1]\n    device = parsed_scenes['device']\n    \n    # Seating furniture\n    seating_types = ['dining_chair', 'stool', 'armchair', 'lounge_chair', 'chinese_chair']\n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    seating_indices = [labels_to_idx.get(st, -1) for st in seating_types if labels_to_idx.get(st, -1) != -1]\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        # Find all seating objects\n        seat_mask = torch.zeros(N, dtype=torch.bool, device=device)\n        for idx in seating_indices:\n            seat_mask |= (object_indices[b] == idx)\n        seat_mask &= ~is_empty[b]\n        \n        seat_positions = positions[b][seat_mask]  # (num_seats, 3)\n        seat_orientations = orientations[b][seat_mask]  # (num_seats, 2)\n        num_seats = seat_mask.sum().item()\n        \n        if num_seats < 2:\n            rewards[b] = 0.0\n            continue\n        \n        # Calculate angular alignment - all chairs should face similar direction\n        total_angle_dev = 0.0\n        count = 0\n        for i in range(num_seats):\n            for j in range(i+1, num_seats):\n                angle_rad = compute_angle(seat_orientations[i:i+1], seat_orientations[j:j+1])\n                angle_deg = abs(angle_rad.item()) * 180.0 / math.pi\n                # Normalize to [0, 90] (considering 180° symmetry)\n                angle_dev = min(angle_deg, 180 - angle_deg)\n                total_angle_dev += angle_dev\n                count += 1\n        \n        avg_angle_dev = total_angle_dev / count if count > 0 else 0.0\n        \n        # Calculate spatial clustering - chairs should be close together\n        total_distance = 0.0\n        dist_count = 0\n        for i in range(num_seats):\n            for j in range(i+1, num_seats):\n                dist = distance_2d(seat_positions[i:i+1], seat_positions[j:j+1])\n                total_distance += dist.item()\n                dist_count += 1\n        \n        avg_distance = total_distance / dist_count if dist_count > 0 else 0.0\n        \n        # Penalty: higher for scattered placement and misaligned orientations\n        # Good classroom: avg_angle_dev < 15°, avg_distance < 3m\n        angle_penalty = max(0, (avg_angle_dev - 15.0) * 0.1)  # 0.1 per degree over 15°\n        distance_penalty = max(0, (avg_distance - 3.0) * 0.5)  # 0.5 per meter over 3m\n        \n        rewards[b] = -(angle_penalty + distance_penalty)\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the organized_seating_arrangement reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    \n    # Scene 1: Well-organized (same direction, close together)\n    num_objects_1 = 6\n    class_label_indices_1 = [chair_idx] * 6\n    translations_1 = [(0, 0.4, 0), (1, 0.4, 0), (2, 0.4, 0), (0, 0.4, 1), (1, 0.4, 1), (2, 0.4, 1)]\n    sizes_1 = [(0.3, 0.4, 0.3)] * 6\n    orientations_1 = [(1, 0)] * 6  # All facing same direction\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Poorly organized (random directions, scattered)\n    num_objects_2 = 6\n    class_label_indices_2 = [chair_idx] * 6\n    translations_2 = [(0, 0.4, 0), (5, 0.4, 0), (10, 0.4, 0), (15, 0.4, 5), (20, 0.4, 10), (25, 0.4, 15)]\n    sizes_2 = [(0.3, 0.4, 0.3)] * 6\n    orientations_2 = [(1, 0), (0, 1), (-1, 0), (0, -1), (0.707, 0.707), (-0.707, 0.707)]  # Random directions\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: Only 1 chair (no penalty)\n    num_objects_3 = 1\n    class_label_indices_3 = [chair_idx]\n    translations_3 = [(0, 0.4, 0)]\n    sizes_3 = [(0.3, 0.4, 0.3)]\n    orientations_3 = [(1, 0)]\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    assert rewards[0] >= -1.0, \"Scene 1 should have minimal penalty (organized)\"\n    assert rewards[1] <= -5.0, \"Scene 2 should have high penalty (disorganized)\"\n    assert rewards[2] >= -0.1, \"Scene 3 should have no penalty (only 1 chair)\"\n    print(\"All tests passed for organized_seating_arrangement!\")",
      "success_threshold": -2.0
    },
    {
      "id": "R4",
      "constraint_id": "C4",
      "name": "sufficient_student_workspace",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Reward for having adequate work surfaces near seating.\n    Count tables/desks and verify they are within 1.5m of chairs.\n    \n    Returns:\n        reward: (B,) tensor - penalty for insufficient workspace coverage\n    '''\n    utility_functions = get_all_utility_functions()\n    distance_2d = utility_functions[\"distance_2d\"][\"function\"]\n    get_object_count = utility_functions[\"get_object_count_in_a_scene\"][\"function\"]\n    \n    positions = parsed_scenes['positions']  # (B, N, 3)\n    object_indices = parsed_scenes['object_indices']  # (B, N)\n    is_empty = parsed_scenes['is_empty']  # (B, N)\n    one_hot = parsed_scenes['one_hot']  # (B, N, num_classes)\n    B, N = positions.shape[0], positions.shape[1]\n    device = parsed_scenes['device']\n    \n    # Workspace and seating furniture\n    workspace_types = ['dining_table', 'desk', 'console_table']\n    seating_types = ['dining_chair', 'stool', 'armchair', 'lounge_chair', 'chinese_chair']\n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    \n    workspace_indices = [labels_to_idx.get(wt, -1) for wt in workspace_types if labels_to_idx.get(wt, -1) != -1]\n    seating_indices = [labels_to_idx.get(st, -1) for st in seating_types if labels_to_idx.get(st, -1) != -1]\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        # Count workspaces\n        total_workspaces = 0\n        for wt in workspace_types:\n            total_workspaces += get_object_count(one_hot[b:b+1], wt, idx_to_labels)\n        \n        # Count seats\n        total_seats = 0\n        for st in seating_types:\n            total_seats += get_object_count(one_hot[b:b+1], st, idx_to_labels)\n        \n        if total_seats == 0:\n            rewards[b] = 0.0\n            continue\n        \n        # Check workspace availability\n        # Ideally: 1 workspace per 2-3 students (for group work)\n        expected_workspaces = max(1, total_seats // 3)\n        \n        if total_workspaces < expected_workspaces:\n            rewards[b] = -(expected_workspaces - total_workspaces) * 2.0\n        else:\n            # Now check proximity: are workspaces within 1.5m of seats?\n            workspace_mask = torch.zeros(N, dtype=torch.bool, device=device)\n            for idx in workspace_indices:\n                workspace_mask |= (object_indices[b] == idx)\n            workspace_mask &= ~is_empty[b]\n            \n            seat_mask = torch.zeros(N, dtype=torch.bool, device=device)\n            for idx in seating_indices:\n                seat_mask |= (object_indices[b] == idx)\n            seat_mask &= ~is_empty[b]\n            \n            workspace_positions = positions[b][workspace_mask]\n            seat_positions = positions[b][seat_mask]\n            \n            if workspace_positions.shape[0] > 0 and seat_positions.shape[0] > 0:\n                # Check if each workspace is near at least one seat\n                far_workspaces = 0\n                for wp in workspace_positions:\n                    min_dist = float('inf')\n                    for sp in seat_positions:\n                        dist = distance_2d(wp.unsqueeze(0), sp.unsqueeze(0)).item()\n                        min_dist = min(min_dist, dist)\n                    if min_dist > 1.5:\n                        far_workspaces += 1\n                \n                rewards[b] = -far_workspaces * 1.0\n            else:\n                rewards[b] = 0.0\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the sufficient_student_workspace reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    table_idx = labels_to_idx.get('dining_table', 11)\n    desk_idx = labels_to_idx.get('desk', 7)\n    \n    # Scene 1: 6 chairs + 2 tables nearby (good)\n    num_objects_1 = 8\n    class_label_indices_1 = [chair_idx]*6 + [table_idx]*2\n    translations_1 = [(0, 0.4, 0), (1, 0.4, 0), (2, 0.4, 0), (0, 0.4, 1), (1, 0.4, 1), (2, 0.4, 1), (0.5, 0.4, 0.5), (1.5, 0.4, 0.5)]\n    sizes_1 = [(0.3, 0.4, 0.3)]*6 + [(0.8, 0.4, 0.5)]*2\n    orientations_1 = [(1, 0)]*8\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: 9 chairs but no tables (bad)\n    num_objects_2 = 9\n    class_label_indices_2 = [chair_idx]*9\n    translations_2 = [(i, 0.4, 0) for i in range(9)]\n    sizes_2 = [(0.3, 0.4, 0.3)]*9\n    orientations_2 = [(1, 0)]*9\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: 6 chairs + 1 desk nearby (okay)\n    num_objects_3 = 7\n    class_label_indices_3 = [chair_idx]*6 + [desk_idx]\n    translations_3 = [(0, 0.4, 0), (1, 0.4, 0), (2, 0.4, 0), (0, 0.4, 1), (1, 0.4, 1), (2, 0.4, 1), (1, 0.4, 0.5)]\n    sizes_3 = [(0.3, 0.4, 0.3)]*6 + [(0.8, 0.4, 0.5)]\n    orientations_3 = [(1, 0)]*7\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    assert rewards[0] >= -1.0, \"Scene 1 should have minimal penalty (good workspace)\"\n    assert rewards[1] <= -3.0, \"Scene 2 should have penalty (no tables)\"\n    assert rewards[2] >= -2.0, \"Scene 3 should have acceptable workspace\"\n    print(\"All tests passed for sufficient_student_workspace!\")",
      "success_threshold": -1.5
    },
    
    {
  "id": "R5",
  "constraint_id": "C5",
  "name": "chair_to_table_pairing",
  "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Reward for chair-table pairing - each chair should be within 1.0m of a table.\n    \n    Returns:\n        reward: (B,) tensor - penalty for unpaired chairs\n    '''\n    utility_functions = get_all_utility_functions()\n    distance_2d = utility_functions[\"distance_2d\"][\"function\"]\n    \n    positions = parsed_scenes['positions']\n    object_indices = parsed_scenes['object_indices']\n    is_empty = parsed_scenes['is_empty']\n    B, N = positions.shape[0], positions.shape[1]\n    device = parsed_scenes['device']\n    \n    workspace_types = ['dining_table', 'desk', 'console_table']\n    seating_types = ['dining_chair', 'stool', 'armchair', 'lounge_chair', 'chinese_chair']\n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    \n    workspace_indices = [labels_to_idx.get(wt, -1) for wt in workspace_types if labels_to_idx.get(wt, -1) != -1]\n    seating_indices = [labels_to_idx.get(st, -1) for st in seating_types if labels_to_idx.get(st, -1) != -1]\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        workspace_mask = torch.zeros(N, dtype=torch.bool, device=device)\n        for idx in workspace_indices:\n            workspace_mask |= (object_indices[b] == idx)\n        workspace_mask &= ~is_empty[b]\n        \n        seat_mask = torch.zeros(N, dtype=torch.bool, device=device)\n        for idx in seating_indices:\n            seat_mask |= (object_indices[b] == idx)\n        seat_mask &= ~is_empty[b]\n        \n        workspace_positions = positions[b][workspace_mask]\n        seat_positions = positions[b][seat_mask]\n        num_seats = seat_mask.sum().item()\n        \n        if num_seats == 0 or workspace_positions.shape[0] == 0:\n            rewards[b] = 0.0\n            continue\n        \n        unpaired_chairs = 0\n        for seat_pos in seat_positions:\n            min_dist = float('inf')\n            for workspace_pos in workspace_positions:\n                dist = distance_2d(seat_pos.unsqueeze(0), workspace_pos.unsqueeze(0)).item()\n                min_dist = min(min_dist, dist)\n            \n            if min_dist > 1.0:\n                unpaired_chairs += 1\n        \n        rewards[b] = -unpaired_chairs * 1.0\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the chair_to_table_pairing reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    table_idx = labels_to_idx.get('dining_table', 11)\n    \n    # Scene 1: 4 chairs all within 1.0m of table (perfect pairing)\n    num_objects_1 = 5\n    class_label_indices_1 = [chair_idx, chair_idx, chair_idx, chair_idx, table_idx]\n    translations_1 = [(0, 0.4, 0), (1.5, 0.4, 0), (0, 0.4, 0.8), (1.5, 0.4, 0.8), (0.75, 0.4, 0.4)]\n    sizes_1 = [(0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.8, 0.4, 0.5)]\n    orientations_1 = [(1, 0), (1, 0), (1, 0), (1, 0), (1, 0)]\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: 4 chairs, 2 far from table (partial pairing)\n    num_objects_2 = 5\n    class_label_indices_2 = [chair_idx, chair_idx, chair_idx, chair_idx, table_idx]\n    translations_2 = [(0, 0.4, 0), (1.5, 0.4, 0), (5, 0.4, 5), (6, 0.4, 5), (0.75, 0.4, 0.4)]\n    sizes_2 = [(0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.8, 0.4, 0.5)]\n    orientations_2 = [(1, 0), (1, 0), (1, 0), (1, 0), (1, 0)]\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: No tables (all chairs unpaired)\n    num_objects_3 = 3\n    class_label_indices_3 = [chair_idx, chair_idx, chair_idx]\n    translations_3 = [(0, 0.4, 0), (1, 0.4, 0), (2, 0.4, 0)]\n    sizes_3 = [(0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.3, 0.4, 0.3)]\n    orientations_3 = [(1, 0), (1, 0), (1, 0)]\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    assert rewards[0] >= -0.1, \"Scene 1 should have no penalty (all chairs paired)\"\n    assert rewards[1] <= -1.5, \"Scene 2 should have penalty (2 chairs unpaired)\"\n    assert rewards[2] == 0.0, \"Scene 3 should have no penalty (no tables to pair with)\"\n    print(\"All tests passed for chair_to_table_pairing!\")",
  "success_threshold": -2.0
},
{
  "id": "R6",
  "constraint_id": "C6",
  "name": "adequate_lighting",
  "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Reward for having adequate lighting (1-2 light sources minimum).\n    \n    Returns:\n        reward: (B,) tensor - 0 if >= 1 light, negative penalty otherwise\n    '''\n    utility_functions = get_all_utility_functions()\n    get_object_count = utility_functions[\"get_object_count_in_a_scene\"][\"function\"]\n    \n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    device = parsed_scenes['device']\n    \n    lighting_types = ['pendant_lamp', 'ceiling_lamp']\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        total_lights = 0\n        for light_type in lighting_types:\n            count = get_object_count(one_hot[b:b+1], light_type, idx_to_labels)\n            total_lights += count\n        \n        if total_lights >= 1:\n            rewards[b] = 0.0\n        else:\n            rewards[b] = -3.0\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the adequate_lighting reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    pendant_idx = labels_to_idx.get('pendant_lamp', 13)\n    ceiling_idx = labels_to_idx.get('ceiling_lamp', 3)\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    \n    # Scene 1: Has pendant lamp\n    num_objects_1 = 2\n    class_label_indices_1 = [pendant_idx, chair_idx]\n    translations_1 = [(0, 2.5, 0), (1, 0.4, 0)]\n    sizes_1 = [(0.2, 0.3, 0.2), (0.3, 0.4, 0.3)]\n    orientations_1 = [(1, 0), (1, 0)]\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has ceiling lamp\n    num_objects_2 = 2\n    class_label_indices_2 = [ceiling_idx, chair_idx]\n    translations_2 = [(0, 2.7, 0), (1, 0.4, 0)]\n    sizes_2 = [(0.3, 0.1, 0.3), (0.3, 0.4, 0.3)]\n    orientations_2 = [(1, 0), (1, 0)]\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: No lighting\n    num_objects_3 = 2\n    class_label_indices_3 = [chair_idx, chair_idx]\n    translations_3 = [(0, 0.4, 0), (1, 0.4, 0)]\n    sizes_3 = [(0.3, 0.4, 0.3), (0.3, 0.4, 0.3)]\n    orientations_3 = [(1, 0), (1, 0)]\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    assert rewards[0] >= -0.1, \"Scene 1 should have no penalty (has pendant lamp)\"\n    assert rewards[1] >= -0.1, \"Scene 2 should have no penalty (has ceiling lamp)\"\n    assert rewards[2] <= -2.9, \"Scene 3 should have penalty (no lighting)\"\n    print(\"All tests passed for adequate_lighting!\")",
  "success_threshold": -0.5
},
{
  "id": "R7",
  "constraint_id": "C7",
  "name": "minimal_living_room_furniture",
  "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Penalize presence of living room furniture that doesn't belong in classroom.\n    \n    Returns:\n        reward: (B,) tensor - penalty for each living room furniture item\n    '''\n    utility_functions = get_all_utility_functions()\n    get_object_count = utility_functions[\"get_object_count_in_a_scene\"][\"function\"]\n    \n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    device = parsed_scenes['device']\n    \n    living_room_types = ['multi_seat_sofa', 'loveseat_sofa', 'l_shaped_sofa', 'chaise_longue_sofa', \n                         'lazy_sofa', 'coffee_table', 'tv_stand', 'lounge_chair']\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        total_living_room_items = 0\n        for lr_type in living_room_types:\n            count = get_object_count(one_hot[b:b+1], lr_type, idx_to_labels)\n            total_living_room_items += count\n        \n        rewards[b] = -total_living_room_items * 2.0\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the minimal_living_room_furniture reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    table_idx = labels_to_idx.get('dining_table', 11)\n    sofa_idx = labels_to_idx.get('multi_seat_sofa', 16)\n    coffee_idx = labels_to_idx.get('coffee_table', 6)\n    tv_idx = labels_to_idx.get('tv_stand', 21)\n    \n    # Scene 1: No living room furniture (good)\n    num_objects_1 = 3\n    class_label_indices_1 = [chair_idx, chair_idx, table_idx]\n    translations_1 = [(0, 0.4, 0), (1, 0.4, 0), (0.5, 0.4, 0.5)]\n    sizes_1 = [(0.3, 0.4, 0.3), (0.3, 0.4, 0.3), (0.8, 0.4, 0.5)]\n    orientations_1 = [(1, 0), (1, 0), (1, 0)]\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has 1 sofa (bad)\n    num_objects_2 = 3\n    class_label_indices_2 = [chair_idx, sofa_idx, table_idx]\n    translations_2 = [(0, 0.4, 0), (2, 0.5, 0), (0.5, 0.4, 0.5)]\n    sizes_2 = [(0.3, 0.4, 0.3), (1.0, 0.5, 0.8), (0.8, 0.4, 0.5)]\n    orientations_2 = [(1, 0), (1, 0), (1, 0)]\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: Has multiple living room items (worse)\n    num_objects_3 = 4\n    class_label_indices_3 = [sofa_idx, coffee_idx, tv_idx, chair_idx]\n    translations_3 = [(0, 0.5, 0), (1, 0.3, 0), (2, 0.4, 0), (3, 0.4, 0)]\n    sizes_3 = [(1.0, 0.5, 0.8), (0.5, 0.3, 0.5), (0.8, 0.5, 0.4), (0.3, 0.4, 0.3)]\n    orientations_3 = [(1, 0), (1, 0), (1, 0), (1, 0)]\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    assert rewards[0] >= -0.1, \"Scene 1 should have no penalty (no living room furniture)\"\n    assert rewards[1] <= -1.9, \"Scene 2 should have penalty (1 sofa)\"\n    assert rewards[2] <= -5.9, \"Scene 3 should have higher penalty (3 living room items)\"\n    print(\"All tests passed for minimal_living_room_furniture!\")",
  "success_threshold": -0.5
},
{
  "id": "R8",
  "constraint_id": "C8",
  "name": "storage_or_display_availability",
  "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Reward for having at least one storage/display unit for classroom materials.\n    \n    Returns:\n        reward: (B,) tensor - 0 if has storage, negative penalty otherwise\n    '''\n    utility_functions = get_all_utility_functions()\n    get_object_count = utility_functions[\"get_object_count_in_a_scene\"][\"function\"]\n    \n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    device = parsed_scenes['device']\n    \n    storage_types = ['bookshelf', 'cabinet', 'shelf']\n    \n    rewards = torch.zeros(B, device=device)\n    \n    for b in range(B):\n        has_storage = False\n        for storage_type in storage_types:\n            count = get_object_count(one_hot[b:b+1], storage_type, idx_to_labels)\n            if count > 0:\n                has_storage = True\n                break\n        \n        if has_storage:\n            rewards[b] = 0.0\n        else:\n            rewards[b] = -3.0\n    \n    return rewards\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Test the storage_or_display_availability reward function.\n    '''\n    utility_functions = get_all_utility_functions()\n    create_scene = utility_functions[\"create_scene_for_testing\"][\"function\"]\n    \n    labels_to_idx = {v: int(k) for k, v in idx_to_labels.items()}\n    bookshelf_idx = labels_to_idx.get('bookshelf', 1)\n    cabinet_idx = labels_to_idx.get('cabinet', 2)\n    chair_idx = labels_to_idx.get('dining_chair', 10)\n    shelf_idx = labels_to_idx.get('shelf', 19)\n    \n    # Scene 1: Has bookshelf\n    num_objects_1 = 2\n    class_label_indices_1 = [bookshelf_idx, chair_idx]\n    translations_1 = [(0, 0.8, 0), (2, 0.4, 0)]\n    sizes_1 = [(0.4, 0.8, 0.3), (0.3, 0.4, 0.3)]\n    orientations_1 = [(1, 0), (1, 0)]\n    scene_1 = create_scene(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has cabinet\n    num_objects_2 = 2\n    class_label_indices_2 = [cabinet_idx, chair_idx]\n    translations_2 = [(0, 0.6, 0), (2, 0.4, 0)]\n    sizes_2 = [(0.5, 0.6, 0.4), (0.3, 0.4, 0.3)]\n    orientations_2 = [(1, 0), (1, 0)]\n    scene_2 = create_scene(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: No storage\n    num_objects_3 = 2\n    class_label_indices_3 = [chair_idx, chair_idx]\n    translations_3 = [(0, 0.4, 0), (1, 0.4, 0)]\n    sizes_3 = [(0.3, 0.4, 0.3), (0.3, 0.4, 0.3)]\n    orientations_3 = [(1, 0), (1, 0)]\n    scene_3 = create_scene(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0)\n        for k in tensor_keys\n    }\n    parsed_scenes['room_type'] = room_type\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Rewards:\", rewards)\n    assert rewards.shape[0] == 3\n    \n    assert rewards[0] >= -0.1, \"Scene 1 should have no penalty (has bookshelf)\"\n    assert rewards[1] >= -0.1, \"Scene 2 should have no penalty (has cabinet)\"\n    assert rewards[2] <= -2.9, \"Scene 3 should have penalty (no storage)\"\n    print(\"All tests passed for storage_or_display_availability!\")",
  "success_threshold": -0.5
}
],
"inpaint": {
  "dining_chair": 10
}
}

In [63]:
# import json
# try:
#     rewards_dict = json.loads(reward_functions.split("```json")[1].split("```")[0])
# except Exception as e:
#     try:
#         rewards_dict = json.loads(reward_functions)
#     except Exception as e:
#         rewards_dict = rewards_dict
    
# print(type(rewards_dict))
# print(rewards_dict.keys())
    
# rewards = rewards_dict["rewards"]
# import os
# output_dir = "dynamic_reward_functions"
# if os.path.exists(output_dir):
#     import shutil
#     shutil.rmtree(os.path.abspath(output_dir))

# for reward in rewards:
#     print(reward["id"], reward["constraint_id"], reward["name"])
#     os.makedirs(output_dir, exist_ok=True)
#     # Write the code to a file named after the reward's snake_case name
#     file_path = os.path.join(output_dir, f"{reward['id']}_{reward['name']}.py")
#     with open(file_path, "w") as f:
#         f.write(reward["code"])

#     print(f"Saved code to {file_path}")

In [64]:
# import hydra
# import os

# # Convert relative config dir to absolute path to avoid HydraException
# config_dir = os.path.abspath("../configurations")

# # To override OmegaConf config values (e.g. set dataset="custom_scene"), 
# # you can pass overrides as a list to hydra.compose:
# with hydra.initialize_config_dir(config_dir):
#     overrides = ["dataset=custom_scene", "algorithm=scene_diffuser_midiffusion", "algorithm.custom.old=True"]
#     cfg = hydra.compose(config_name="config", overrides=overrides)

In [65]:
# from commons import verify_tests_for_reward_function, get_reward_stats_from_dataset_helper, get_reward_stats_from_baseline_helper

# test_passed = verify_tests_for_reward_function(ROOM_TYPE)
# if not test_passed:
#     print("Test verification failed!")
#     raise RuntimeError("Test verification failed!")
# print("Test verification passed!")

In [66]:
# inpaint_masks = rewards_dict["inpaint"]
# inpaint_masks = str(inpaint_masks).replace("'", '')
# print(inpaint_masks)
# print(type(inpaint_masks))
# threshold_dict = {}
# for reward in rewards_dict["rewards"]:
#     threshold_dict[reward["name"]] = reward["success_threshold"]
# print(threshold_dict)

In [67]:
# # dataset_stats = get_reward_stats_from_dataset_helper(cfg)
# baseline_stats = get_reward_stats_from_baseline_helper(cfg, inpaint_masks=inpaint_masks, threshold_dict=threshold_dict)  
# # print(f"Dataset stats: {dataset_stats}")
# print(f"Baseline stats: {baseline_stats}")

In [None]:
# import os
# stats = ""

# for txt_file in os.listdir("/media/ajad/YourBook/AshokSaugatResearchBackup/AshokSaugatResearch/steerable-scene-generation/dynamic_constraint_rewards/reward_analysis_txt"):
#     if txt_file.endswith(".txt"):
#         with open(os.path.join("/media/ajad/YourBook/AshokSaugatResearchBackup/AshokSaugatResearch/steerable-scene-generation/dynamic_constraint_rewards/reward_analysis_txt", txt_file), "r") as f:
#             content = f.read()
#             stats += f"\n\n--- Stats from {txt_file} ---\n"
#             stats += content



# llm_user_prompt_3 = f"""
#     Original User Prompt: {USER_PROMPT}
#     Constraints: {constraints}
#     reward_dict: {rewards_dict}
#     Reward Statistics = {stats}
#     """

# console.print(Markdown(llm_user_prompt_3))

In [None]:
# llm_response_2 = 

# curriculum development based on reward statistics and constraints
# constraits cleaning to keep only most relevant constraints and which are not already covered by universal rewards
# and return rewards for first stage training

llm_instruction_3 = f"""
# TASK: Reward Functions Finetuning based on reward statistics

You are an expert in 3D scene generation, interior design, and reinforcement learning.
Your task is to analyze the user prompt, initial constraints, initial reward functions and the statistics of those reward functions on entire dataset as well as on synthetic scenes generated from baseline model (1000 scenes). Then, return the new constraints, new reward functions based on the analysis.

Now, as an rl expert in reward shaping if the original reward functions is reasonably learnable then keep them else create easier sub-constraints and make new rewards which are easier for the current baseline model to learn.

## CONTEXT

### Dataset: 3D-FRONT
{dataset_facts}

Here is the dataset information in JSON format about the specific room type: {ROOM_TYPE} you will be working on:
```json
{dataset_context}
```

### Scene Representation
{scene_representation}

### You also have the following utility functions at your disposal which you can use according to the given docstrings.
```json
{utility_functions}
```

### The baseline model is already trained on some universal constraints, so you do not need to consider these constraints while generating new ones. The universal constraints are:
```json
{universal_rewards_info_with_docstrings}
```

## YOUR TASK
Analyze the user prompt, initial constraints, initial reward functions, the statistics of those reward functions on entire dataset as well as on synthetic scenes generated from baseline model (1000 scenes) and all other context i have provided, then provide a comprehensive JSON response with the following structure:

The template for reward function to quantify each constraint satisfaction with python code is as follows:
{reward_function_template}

Note: While using the utility functions, you can use the following code snippet:

```python
from dynamic_constraint_rewards.utilities import get_all_utility_functions

utility_functions = get_all_utility_functions()
return_values = utility_functions["function_name"]["function"](required_arguments(from docstring), **kwargs)
```

Also, passing all required parameters to the utlity functions is a must
(Example: don't miss room_type for create_scene_for_testing)

Also, Given the reward constraints, analyze and if there are constraints like: a scene must have n number of objects of a particular class, then inpaint those objects. To inpaint, pass the class labels and counts in the json format as specified below.

Example: If R1 = "a scene must have exactly 4 ceiling lamps", R2 = "a scene must have exactly 2 nightstands", then inpaint the objects with:

```
"inpaint": {{
  "ceiling_lamp": 4,
  "nightstand": 2
}}
```

Also, success_threshold is a float type, that indicates the constraint is satisfied (if unnormalized_raw_reward_value >= success_threshold)
---

Note: Here is the example of the reward statistics format after evaluating the initial reward functions on the baseline model generated scenes (1000 scenes) as well as on the entire dataset for the given room type:

--- Stats from R1_ceiling_lamps_count_llm_summary_baseline.txt ---                                                 

=== REWARD ANALYSIS: R1_ceiling_lamps_count ===                                                                    

PERFORMANCE METRICS: • Success Rate: 100.0% (1000/1000 scenes) • Mean Reward: 0.0000 • Median Reward: 0.0000 •     
Range: [0.0000, 0.0000] • Std Dev: 0.0000 • Percentiles: - P1: 0.0000 - P5: 0.0000 - P25: 0.0000 - P75: 0.0000 -   
P95: 0.0000 - P99: 0.0000                                                                                          

DISTRIBUTION CHARACTERISTICS:                                                                                      

• Skewness: nan • Kurtosis: nan • Min Rate: 100.0% • Near Min Rate: 100.0% • Max Rate: 100.0% • Near Max Rate:     
100.0%                                                                                                             

============================================================                                                       

--- Stats from R1_ceiling_lamps_count_llm_summary_dataset.txt ---                                                  

=== REWARD ANALYSIS: R1_ceiling_lamps_count ===                                                                    

PERFORMANCE METRICS: • Success Rate: 0.0% (0/4041 scenes) • Mean Reward: -3.6847 • Median Reward: -4.0000 • Range: 
[-4.0000, -1.0000] • Std Dev: 0.4746 • Percentiles: - P1: -4.0000 - P5: -4.0000 - P25: -4.0000 - P75: -3.0000 -    
P95: -3.0000 - P99: -3.0000                                                                                        

DISTRIBUTION CHARACTERISTICS:                                                                                      

• Skewness: 0.94 • Kurtosis: -0.66 • Min Rate: 68.9% • Near Min Rate: 68.9% • Max Rate: 0.0% • Near Max Rate: 0.0% 

Where, stats from txt file with _baseline.txt eg R1_ceiling_lamps_count_llm_summary_baseline.txt indicates the reward statistics on the baseline model generated scenes (1000 scenes) and
stats from txt file with _dataset.txt eg R1_ceiling_lamps_count_llm_summary_dataset.txt indicates the reward statistics on the entire dataset for the given room type.

Success Rate = number of scenes satisfying the constraint / total number of scenes * 100%
Mean Reward = average reward value across all scenes
Median Reward = median reward value across all scenes
Range = [minimum reward value, maximum reward value]
Std Dev = standard deviation of reward values
Percentiles = reward values at different percentiles (P1, P5, P25, P75, P95, P99)
Skewness = measure of asymmetry of the reward
Kurtosis = measure of "tailedness" of the reward distribution
Near Max Rate = percentage of scenes with near-maximum reward
Near Min Rate = percentage of scenes with near-minimum reward
Max Rate = percentage of scenes with maximum reward
Min Rate = percentage of scenes with minimum reward

Ignore the nan values in the statistics(if any).

---
If rewards given in input has inpaint info as "inpaint": {{
  "class_label1": count1,
  "class_label2": count2,
  ...,
  "class_labeln": countn
  }}
  
this means that we got the statistics from baseline model generated scenes by hardcoding those objects and inpainted rest of the scene. it was not the distribution learned by the pretrianed model from the dataset. But still we need to keep the reward functions for those constraints in the output rewards list because model may generate more objects of that class than the inpainted count and we need to penalize that.

---
Only return the following JSON response (nothing else), follow this structure strictly:

```json
{{
  "constraints": [
    {{
      "id": "SC1",
      "name": "descriptive_snake_case_name",
      "description": "Detailed description of the constraint in natural language explaining what needs to be satisfied in the scene."
    }},
    {{
      "id": "SC2",
      "name": "descriptive_snake_case_name",
      "description": "Detailed description of the constraint in natural language explaining what needs to be satisfied in the scene."
    }},
    ...
    {{
      "id": "SCn",
      "name": "descriptive_snake_case_name",
      "description": "Detailed description of the constraint in natural language explaining what needs to be satisfied in the scene."
    }}
  ]
  "rewards": [
    {{
      "id": "SR1",
      "constraint_id": "SC1",
      "name": "descriptive_snake_case_name",
      "code": "Python Code implementing get_reward and test_reward functions as per the template",
      "success_threshold": "Value in terms of raw reward units as implemented in Python code indicating satisfactory fulfillment of the constraint. This will be used to calculate success rate."
    }},
    {{
      "id": "SR2",
      "constraint_id": "SC2",
      "name": "descriptive_snake_case_name",
      "code": "Python Code implementing get_reward and test_reward functions as per the template",
      "success_threshold": "Value in terms of raw reward units as implemented in Python code indicating satisfactory fulfillment of the constraint. This will be used to calculate success rate."
    }},
    ...
    {{
      "id": "SRn",
      "constraint_id": "SCn",
      "name": "descriptive_snake_case_name",
      "code": "Python Code implementing get_reward and test_reward functions as per the template",
      "success_threshold": "Value in terms of raw reward units as implemented in Python code indicating satisfactory fulfillment of the constraint. This will be used to calculate success rate."
    }}
  ],
}}
```

NOTE: Even if you have inpainted objects due to some constraints, keep those constraints in the rewards list.
NOTE: If you are passing any other arguments other than specified in the function descriptions, make sure to get it from the kwargs dictionary. (kwargs.get("argument_name"))
NOTE: You should use the utility functions exactly as the docstrings provided, all arguments should be passed in the same order as in the docstrings. (followed by kwargs if required)
"""

console.print(Markdown(llm_instruction_3))


In [70]:
llm_response_3 = None

# reward weights for dynamic and universal rewards

llm_instruction_4 = f"""
# TASK: Assigning inportance weights to each of dynamic and universal reward components.

You are an expert in 3D scene generation, interior design, and reinforcement learning.
Your task is to analyze the user prompt, final constraints, final dynamic and universal reward functions. Then, return the weights to be applied to each of the rewards while training the reinforcement learning model.

Now, as an reinforcement learning expert in reward shaping if any of the reward functions conflict then according to the desired behaviour as specified in the user prompt, return the weights to be applied to each of the rewards such that the final reward value will be the most suitable while training the reinforcement learning model.

## CONTEXT

### Dataset: 3D-FRONT
{dataset_facts}

Here is the dataset information in JSON format about the specific room type: {ROOM_TYPE} you will be working on:
```json
{dataset_context}
```

### Scene Representation
{scene_representation}


### The baseline model is already trained on some universal constraints and they are used as part of reward functions as well for regularization,


## YOUR TASK
Analyze the original user prompt, final constraints, final dynamic reward functions and universal reward functions, then provide a comprehensive JSON response with the following structure.

It should be noted that each reward components is converted to the range [-1, 1] before applying the weighted sum so the weights should purely be based on the importance of the rewards. This task is aimed to reduce conflicting rewards because some dynamic reward may try to conflict with these universal ones.

---
Only return the following JSON response (nothing else), follow this structure strictly:

```json
{{
  "importance_weights": [
    {{
      "reward_name": weight(float),
    }},
    {{
      "reward_name": weight(float),
    }},
    ...
    {{
      "reward_name": weight(float),
    }}
  ]
  
  ],
}}
```

"""

In [71]:
console.print(Markdown(llm_instruction_4))

In [72]:
final_constraints_and_dynamic_rewards = {
  "constraints": [
    {
      "id": "SC1",
      "name": "minimum_seating_capacity",
      "description": "Ensure the scene contains seating for at least 10 students. Count all seating furniture (dining_chair, stool, armchair, lounge_chair, chinese_chair) and verify total capacity ≥ 10."
    },
    {
      "id": "SC2",
      "name": "has_teaching_workspace",
      "description": "Verify the scene contains at least one desk or console_table that can serve as a teacher's workspace/teaching station."
    },
    {
      "id": "SC3",
      "name": "sufficient_student_workspace",
      "description": "Ensure there are adequate work surfaces for students. Count dining_table, desk, or console_table instances and verify at least 2 such surfaces exist for a 10-student classroom."
    },
    {
      "id": "SC4",
      "name": "chair_to_table_proximity",
      "description": "Verify that seating positions (chairs/stools) have reasonable proximity to table surfaces within 1.5m, ensuring students can access workspace."
    },
    {
      "id": "SC5",
      "name": "adequate_lighting",
      "description": "Check for presence of sufficient lighting fixtures (pendant_lamp, ceiling_lamp) appropriate for a learning environment. Minimum 1-2 light sources for a classroom."
    },
    {
      "id": "SC6",
      "name": "reduced_living_room_furniture",
      "description": "Penalize presence of typical living room furniture that doesn't belong in a classroom. Focus on most common living room items: sofas (multi_seat_sofa, loveseat_sofa, l_shaped_sofa), coffee_tables, and tv_stands."
    },
    {
      "id": "SC7",
      "name": "storage_availability",
      "description": "Verify presence of at least one storage/display furniture unit (bookshelf, cabinet, shelf) for classroom materials and student belongings."
    }
  ],
  "rewards": [
    {
      "id": "SR1",
      "constraint_id": "SC1",
      "name": "minimum_seating_capacity",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Ensure the scene contains seating for at least 10 students.\n    Count all seating furniture: dining_chair, stool, armchair, lounge_chair, chinese_chair\n    Reward: 0 if count >= 10, otherwise penalty of -(10 - count)\n    '''\n    utility_functions = get_all_utility_functions()\n    one_hot = parsed_scenes['one_hot']  # shape (B, N, num_classes)\n    B = one_hot.shape[0]\n    \n    seating_types = ['dining_chair', 'stool', 'armchair', 'lounge_chair', 'chinese_chair']\n    rewards = []\n    \n    for i in range(B):\n        scene_one_hot = one_hot[i].unsqueeze(0)  # (1, N, num_classes)\n        total_seats = 0\n        \n        for seat_type in seating_types:\n            count = utility_functions['get_object_count_in_a_scene']['function'](\n                scene_one_hot, seat_type, idx_to_labels\n            )\n            total_seats += count\n        \n        if total_seats >= 10:\n            reward_val = 0.0\n        else:\n            reward_val = -(10 - total_seats)\n        \n        rewards.append(reward_val)\n    \n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    \n    # Scene 1: Exactly 10 chairs (satisfies constraint)\n    num_objects_1 = 10\n    class_label_indices_1 = [10] * 10  # dining_chair\n    translations_1 = [(i*0.8, 0.5, 0) for i in range(10)]\n    sizes_1 = [(0.3, 0.5, 0.3)] * 10\n    orientations_1 = [(1, 0)] * 10\n    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Only 7 chairs (does not satisfy)\n    num_objects_2 = 7\n    class_label_indices_2 = [10] * 7\n    translations_2 = [(i*0.8, 0.5, 0) for i in range(7)]\n    sizes_2 = [(0.3, 0.5, 0.3)] * 7\n    orientations_2 = [(1, 0)] * 7\n    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: 12 chairs (exceeds requirement)\n    num_objects_3 = 12\n    class_label_indices_3 = [10] * 12\n    translations_3 = [(i*0.8, 0.5, 0) for i in range(12)]\n    sizes_3 = [(0.3, 0.5, 0.3)] * 12\n    orientations_3 = [(1, 0)] * 12\n    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Minimum Seating Capacity Rewards:\", rewards)\n    \n    # Test cases\n    assert torch.isclose(rewards[0], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[1], torch.tensor(-3.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[2], torch.tensor(0.0, device=parsed_scenes['device']))\n    \n    return rewards\n",
      "success_threshold": "0.0"
    },
    {
      "id": "SR2",
      "constraint_id": "SC2",
      "name": "has_teaching_workspace",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Verify the scene contains at least one desk or console_table for teacher workspace.\n    Reward: 0 if at least 1 exists, otherwise -1\n    '''\n    utility_functions = get_all_utility_functions()\n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    \n    workspace_types = ['desk', 'console_table']\n    rewards = []\n    \n    for i in range(B):\n        scene_one_hot = one_hot[i].unsqueeze(0)\n        total_workspace = 0\n        \n        for ws_type in workspace_types:\n            count = utility_functions['get_object_count_in_a_scene']['function'](\n                scene_one_hot, ws_type, idx_to_labels\n            )\n            total_workspace += count\n        \n        if total_workspace >= 1:\n            reward_val = 0.0\n        else:\n            reward_val = -1.0\n        \n        rewards.append(reward_val)\n    \n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    \n    # Scene 1: Has desk (satisfies)\n    num_objects_1 = 1\n    class_label_indices_1 = [9]  # desk\n    translations_1 = [(2, 0.4, 2)]\n    sizes_1 = [(0.6, 0.4, 0.4)]\n    orientations_1 = [(1, 0)]\n    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has console_table (satisfies)\n    num_objects_2 = 1\n    class_label_indices_2 = [7]  # console_table\n    translations_2 = [(2, 0.4, 2)]\n    sizes_2 = [(0.6, 0.4, 0.4)]\n    orientations_2 = [(1, 0)]\n    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: No workspace furniture\n    num_objects_3 = 2\n    class_label_indices_3 = [10, 10]  # only chairs\n    translations_3 = [(0, 0.5, 0), (1, 0.5, 0)]\n    sizes_3 = [(0.3, 0.5, 0.3), (0.3, 0.5, 0.3)]\n    orientations_3 = [(1, 0), (1, 0)]\n    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Teaching Workspace Rewards:\", rewards)\n    \n    assert torch.isclose(rewards[0], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[1], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[2], torch.tensor(-1.0, device=parsed_scenes['device']))\n    \n    return rewards\n",
      "success_threshold": "0.0"
    },
    {
      "id": "SR3",
      "constraint_id": "SC3",
      "name": "sufficient_student_workspace",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Ensure there are at least 2 work surfaces (dining_table, desk, console_table) for students.\n    Reward: 0 if count >= 2, otherwise -(2 - count)\n    '''\n    utility_functions = get_all_utility_functions()\n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    \n    workspace_types = ['dining_table', 'desk', 'console_table']\n    rewards = []\n    \n    for i in range(B):\n        scene_one_hot = one_hot[i].unsqueeze(0)\n        total_tables = 0\n        \n        for table_type in workspace_types:\n            count = utility_functions['get_object_count_in_a_scene']['function'](\n                scene_one_hot, table_type, idx_to_labels\n            )\n            total_tables += count\n        \n        if total_tables >= 2:\n            reward_val = 0.0\n        else:\n            reward_val = -(2 - total_tables)\n        \n        rewards.append(reward_val)\n    \n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    \n    # Scene 1: Has 2 dining tables (satisfies)\n    num_objects_1 = 2\n    class_label_indices_1 = [11, 11]  # dining_table\n    translations_1 = [(1, 0.4, 1), (3, 0.4, 1)]\n    sizes_1 = [(0.8, 0.4, 0.6), (0.8, 0.4, 0.6)]\n    orientations_1 = [(1, 0), (1, 0)]\n    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has only 1 table\n    num_objects_2 = 1\n    class_label_indices_2 = [11]\n    translations_2 = [(2, 0.4, 2)]\n    sizes_2 = [(0.8, 0.4, 0.6)]\n    orientations_2 = [(1, 0)]\n    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: Has 3 tables (exceeds)\n    num_objects_3 = 3\n    class_label_indices_3 = [11, 11, 9]  # 2 dining_table + 1 desk\n    translations_3 = [(1, 0.4, 1), (3, 0.4, 1), (5, 0.4, 1)]\n    sizes_3 = [(0.8, 0.4, 0.6), (0.8, 0.4, 0.6), (0.6, 0.4, 0.4)]\n    orientations_3 = [(1, 0), (1, 0), (1, 0)]\n    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Sufficient Student Workspace Rewards:\", rewards)\n    \n    assert torch.isclose(rewards[0], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[1], torch.tensor(-1.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[2], torch.tensor(0.0, device=parsed_scenes['device']))\n    \n    return rewards\n",
      "success_threshold": "0.0"
    },
    {
      "id": "SR4",
      "constraint_id": "SC4",
      "name": "chair_to_table_proximity",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Verify that seating has reasonable proximity to tables (within 1.5m).\n    For each chair/stool, check if there's a table within 1.5m.\n    Reward: 0 if all chairs have nearby table, otherwise -1 per unpaired chair\n    '''\n    utility_functions = get_all_utility_functions()\n    positions = parsed_scenes['positions']  # (B, N, 3)\n    object_indices = parsed_scenes['object_indices']  # (B, N)\n    is_empty = parsed_scenes['is_empty']  # (B, N)\n    B = positions.shape[0]\n    N = positions.shape[1]\n    \n    seating_types = ['dining_chair', 'stool', 'armchair', 'lounge_chair', 'chinese_chair']\n    table_types = ['dining_table', 'desk', 'console_table']\n    max_distance = 1.5\n    \n    rewards = []\n    \n    for i in range(B):\n        # Find all chairs and tables in this scene\n        chair_positions = []\n        table_positions = []\n        \n        for j in range(N):\n            if is_empty[i, j]:\n                continue\n            \n            obj_idx = int(object_indices[i, j].item())\n            obj_label = idx_to_labels[str(obj_idx)]\n            pos = positions[i, j]\n            \n            if obj_label in seating_types:\n                chair_positions.append(pos)\n            elif obj_label in table_types:\n                table_positions.append(pos)\n        \n        if len(chair_positions) == 0:\n            rewards.append(0.0)\n            continue\n        \n        if len(table_positions) == 0:\n            # No tables but have chairs - penalize all chairs\n            rewards.append(-len(chair_positions))\n            continue\n        \n        # Count chairs without nearby table\n        unpaired_chairs = 0\n        for chair_pos in chair_positions:\n            has_nearby_table = False\n            for table_pos in table_positions:\n                dist = utility_functions['distance_2d']['function'](chair_pos.unsqueeze(0), table_pos.unsqueeze(0))\n                if dist.item() <= max_distance:\n                    has_nearby_table = True\n                    break\n            \n            if not has_nearby_table:\n                unpaired_chairs += 1\n        \n        if unpaired_chairs == 0:\n            reward_val = 0.0\n        else:\n            reward_val = -unpaired_chairs\n        \n        rewards.append(reward_val)\n    \n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    \n    # Scene 1: 2 chairs near a table (within 1.5m)\n    num_objects_1 = 3\n    class_label_indices_1 = [10, 10, 11]  # 2 chairs + 1 table\n    translations_1 = [(1, 0.5, 1), (2, 0.5, 1), (1.5, 0.4, 1.8)]\n    sizes_1 = [(0.3, 0.5, 0.3), (0.3, 0.5, 0.3), (0.8, 0.4, 0.6)]\n    orientations_1 = [(1, 0), (1, 0), (1, 0)]\n    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: 2 chairs but one is far from table (>1.5m)\n    num_objects_2 = 3\n    class_label_indices_2 = [10, 10, 11]\n    translations_2 = [(1, 0.5, 1), (5, 0.5, 1), (1.5, 0.4, 1.8)]  # second chair far\n    sizes_2 = [(0.3, 0.5, 0.3), (0.3, 0.5, 0.3), (0.8, 0.4, 0.6)]\n    orientations_2 = [(1, 0), (1, 0), (1, 0)]\n    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: Chairs but no table\n    num_objects_3 = 2\n    class_label_indices_3 = [10, 10]\n    translations_3 = [(1, 0.5, 1), (2, 0.5, 1)]\n    sizes_3 = [(0.3, 0.5, 0.3), (0.3, 0.5, 0.3)]\n    orientations_3 = [(1, 0), (1, 0)]\n    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Chair to Table Proximity Rewards:\", rewards)\n    \n    assert torch.isclose(rewards[0], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[1], torch.tensor(-1.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[2], torch.tensor(-2.0, device=parsed_scenes['device']))\n    \n    return rewards\n",
      "success_threshold": "0.0"
    },
    {
      "id": "SR5",
      "constraint_id": "SC5",
      "name": "adequate_lighting",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Check for adequate lighting: at least 1 lighting fixture (pendant_lamp or ceiling_lamp).\n    Reward: 0 if at least 1 light, otherwise -1\n    '''\n    utility_functions = get_all_utility_functions()\n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    \n    lighting_types = ['pendant_lamp', 'ceiling_lamp']\n    rewards = []\n    \n    for i in range(B):\n        scene_one_hot = one_hot[i].unsqueeze(0)\n        total_lights = 0\n        \n        for light_type in lighting_types:\n            count = utility_functions['get_object_count_in_a_scene']['function'](\n                scene_one_hot, light_type, idx_to_labels\n            )\n            total_lights += count\n        \n        if total_lights >= 1:\n            reward_val = 0.0\n        else:\n            reward_val = -1.0\n        \n        rewards.append(reward_val)\n    \n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    \n    # Scene 1: Has ceiling lamp\n    num_objects_1 = 1\n    class_label_indices_1 = [3]  # ceiling_lamp\n    translations_1 = [(2, 2.8, 2)]\n    sizes_1 = [(0.3, 0.3, 0.3)]\n    orientations_1 = [(1, 0)]\n    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has pendant lamp\n    num_objects_2 = 1\n    class_label_indices_2 = [17]  # pendant_lamp\n    translations_2 = [(2, 2.5, 2)]\n    sizes_2 = [(0.2, 0.4, 0.2)]\n    orientations_2 = [(1, 0)]\n    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: No lighting\n    num_objects_3 = 1\n    class_label_indices_3 = [10]  # just a chair\n    translations_3 = [(2, 0.5, 2)]\n    sizes_3 = [(0.3, 0.5, 0.3)]\n    orientations_3 = [(1, 0)]\n    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Adequate Lighting Rewards:\", rewards)\n    \n    assert torch.isclose(rewards[0], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[1], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[2], torch.tensor(-1.0, device=parsed_scenes['device']))\n    \n    return rewards\n",
      "success_threshold": "0.0"
    },
    {
      "id": "SR6",
      "constraint_id": "SC6",
      "name": "reduced_living_room_furniture",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Penalize presence of living room furniture: sofas, coffee_table, tv_stand.\n    Reward: 0 if none present, otherwise -1 per item (max penalty -5)\n    '''\n    utility_functions = get_all_utility_functions()\n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    \n    unwanted_types = ['multi_seat_sofa', 'loveseat_sofa', 'l_shaped_sofa', 'coffee_table', 'tv_stand']\n    rewards = []\n    \n    for i in range(B):\n        scene_one_hot = one_hot[i].unsqueeze(0)\n        total_unwanted = 0\n        \n        for unwanted_type in unwanted_types:\n            count = utility_functions['get_object_count_in_a_scene']['function'](\n                scene_one_hot, unwanted_type, idx_to_labels\n            )\n            total_unwanted += count\n        \n        if total_unwanted == 0:\n            reward_val = 0.0\n        else:\n            reward_val = -min(total_unwanted, 5)\n        \n        rewards.append(reward_val)\n    \n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    \n    # Scene 1: No living room furniture\n    num_objects_1 = 2\n    class_label_indices_1 = [10, 11]  # chair + table\n    translations_1 = [(1, 0.5, 1), (2, 0.4, 1)]\n    sizes_1 = [(0.3, 0.5, 0.3), (0.8, 0.4, 0.6)]\n    orientations_1 = [(1, 0), (1, 0)]\n    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has 1 sofa and 1 coffee table\n    num_objects_2 = 2\n    class_label_indices_2 = [16, 6]  # multi_seat_sofa + coffee_table\n    translations_2 = [(2, 0.4, 2), (3, 0.3, 3)]\n    sizes_2 = [(1.0, 0.4, 0.6), (0.5, 0.3, 0.5)]\n    orientations_2 = [(1, 0), (1, 0)]\n    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: Has 3 unwanted items\n    num_objects_3 = 3\n    class_label_indices_3 = [16, 6, 21]  # sofa + coffee_table + tv_stand\n    translations_3 = [(2, 0.4, 2), (3, 0.3, 3), (4, 0.5, 4)]\n    sizes_3 = [(1.0, 0.4, 0.6), (0.5, 0.3, 0.5), (0.8, 0.5, 0.4)]\n    orientations_3 = [(1, 0), (1, 0), (1, 0)]\n    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Reduced Living Room Furniture Rewards:\", rewards)\n    \n    assert torch.isclose(rewards[0], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[1], torch.tensor(-2.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[2], torch.tensor(-3.0, device=parsed_scenes['device']))\n    \n    return rewards\n",
      "success_threshold": "0.0"
    },
    {
      "id": "SR7",
      "constraint_id": "SC7",
      "name": "storage_availability",
      "code": "import torch\nfrom dynamic_constraint_rewards.utilities import get_all_utility_functions, create_scene_for_testing\n\n\ndef get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs):\n    '''\n    Verify presence of at least one storage unit (bookshelf, cabinet, shelf).\n    Reward: 0 if at least 1 exists, otherwise -1\n    '''\n    utility_functions = get_all_utility_functions()\n    one_hot = parsed_scenes['one_hot']\n    B = one_hot.shape[0]\n    \n    storage_types = ['bookshelf', 'cabinet', 'shelf']\n    rewards = []\n    \n    for i in range(B):\n        scene_one_hot = one_hot[i].unsqueeze(0)\n        total_storage = 0\n        \n        for storage_type in storage_types:\n            count = utility_functions['get_object_count_in_a_scene']['function'](\n                scene_one_hot, storage_type, idx_to_labels\n            )\n            total_storage += count\n        \n        if total_storage >= 1:\n            reward_val = 0.0\n        else:\n            reward_val = -1.0\n        \n        rewards.append(reward_val)\n    \n    reward = torch.tensor(rewards, device=parsed_scenes['device'], dtype=torch.float)\n    return reward\n\n\ndef test_reward(idx_to_labels, room_type, floor_polygons, **kwargs):\n    import torch\n    \n    # Scene 1: Has bookshelf\n    num_objects_1 = 1\n    class_label_indices_1 = [1]  # bookshelf\n    translations_1 = [(1, 1.0, 1)]\n    sizes_1 = [(0.4, 1.0, 0.3)]\n    orientations_1 = [(1, 0)]\n    scene_1 = create_scene_for_testing(room_type, num_objects_1, class_label_indices_1, translations_1, sizes_1, orientations_1)\n    \n    # Scene 2: Has cabinet\n    num_objects_2 = 1\n    class_label_indices_2 = [2]  # cabinet\n    translations_2 = [(2, 0.5, 2)]\n    sizes_2 = [(0.5, 0.5, 0.4)]\n    orientations_2 = [(1, 0)]\n    scene_2 = create_scene_for_testing(room_type, num_objects_2, class_label_indices_2, translations_2, sizes_2, orientations_2)\n    \n    # Scene 3: No storage furniture\n    num_objects_3 = 2\n    class_label_indices_3 = [10, 11]  # chair + table\n    translations_3 = [(1, 0.5, 1), (2, 0.4, 2)]\n    sizes_3 = [(0.3, 0.5, 0.3), (0.8, 0.4, 0.6)]\n    orientations_3 = [(1, 0), (1, 0)]\n    scene_3 = create_scene_for_testing(room_type, num_objects_3, class_label_indices_3, translations_3, sizes_3, orientations_3)\n    \n    tensor_keys = [k for k in scene_1 if isinstance(scene_1[k], torch.Tensor)]\n    parsed_scenes = {\n        k: torch.cat([scene_1[k], scene_2[k], scene_3[k]], dim=0) for k in tensor_keys\n    }\n    parsed_scenes['device'] = scene_1['device']\n    \n    rewards = get_reward(parsed_scenes, idx_to_labels, room_type, floor_polygons, **kwargs)\n    print(\"Storage Availability Rewards:\", rewards)\n    \n    assert torch.isclose(rewards[0], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[1], torch.tensor(0.0, device=parsed_scenes['device']))\n    assert torch.isclose(rewards[2], torch.tensor(-1.0, device=parsed_scenes['device']))\n    \n    return rewards\n",
      "success_threshold": "0.0"
    }
  
    ],
    "inpaint": {
        "dining_chair": 10
    }
    }

In [73]:
llm_user_prompt_4 = f"""
Original User Prompt: {USER_PROMPT}
Final Constraints: {final_constraints_and_dynamic_rewards["constraints"]}
Final Dynamic Reward Functions: {final_constraints_and_dynamic_rewards["rewards"]}
Final Universal Reward Functions: {universal_rewards_info_with_docstrings}
"""

In [74]:
console.print(Markdown(llm_user_prompt_4))

In [75]:
llm_response_4 = None

## Phase 2: RL Training

In [76]:
# run training loop for N steps with the selected rewards and weights,
# lr and ddmp_regularization_weight appropriate
# if test cases fail or syntax errors occur, provide feedback to LLM with error messages and fix the code automatically

## Phase 3: Feedback to LLM

In [77]:
# run get_reward_stats_from_baseline now baseline is the current model after RL training, get stats

llm_instruction_5 = f"""
{all_info_from_phase_1}

{feedback_stats}

return {
  "rewards": [
    {
      "id": "R1",
      "constraint_id": "C1",
      "name": "descriptive_snake_case_name",
      "code": "python code implementing get_reward and test_reward functions as per the template",
      "success_threshold": "value in raw reward units as in python implementation indicating satisfactory fulfillment of the constraint. this will be used to calculate success rate."
    }
  ],
  "weights": {
    "R1": x  
  }
    

            
}
"""

NameError: name 'all_info_from_phase_1' is not defined

## Phase 4: Further RL training

In [None]:
# run training

## Repeat if necessary

In [None]:
We trained a RL policy using the provided reward function code and tracked the values of the
individual components in the reward function as well as global policy metrics such as
success rates and episode lengths after every {epoch_freq} epochs and the maximum, mean,
minimum values encountered:
<REWARD REFLECTION HERE>
Please carefully analyze the policy feedback and provide a new, improved reward function that
can better solve the task. Some helpful tips for analyzing the policy feedback:
(1) If the success rates are always near zero, then you must rewrite the entire reward
function
(2) If the values for a certain reward component are near identical throughout, then this
means RL is not able to optimize this component as it is written. You may consider
(a) Changing its scale or the value of its temperature parameter
(b) Re-writing the reward component