In [49]:
from architectural_principles import ArchitecturalConstraints, State
from train import ExperimentManager
from environment import ArchitecturalEnvironment
import numpy as np
import copy
def_rooms = ArchitecturalConstraints.default_rooms()
config = {
    'env': {
        'grid_size': (10, 10),
        'max_steps': 500,
        'required_rooms': {room: def_rooms[room] for room in list(def_rooms.keys())[:1]}
    },
    'algorithms': {
        'value_iteration': {
            'gamma': .95,
            'theta': 0.001
        }
    }
}



In [None]:
env = ArchitecturalEnvironment((10, 10), 50, def_rooms)
init_state = env.reset()
state_value_map = {}
sampled_states = [init_state]
actions = ExperimentManager(config)._get_action_space()
num_iterations = 9
max_states = 1000
gamma = 0.8

def state_to_key(state) -> str:
    """Convert state to a hashable key."""
    return str(state.layout) + str(state.placed_rooms) + str(state.current_step)

def sample_initial_states():
    """Generate initial random states."""
    states = []
    env.reset()
    for room_name in env.required_rooms.keys():
        action = {
            "type": "add_room",
            "params": {
                "name": room_name,
                "room_type": env.required_rooms[room_name].room_type,
                "position": (
                    np.random.randint(0, env.grid_size[0] - 1),
                    np.random.randint(0, env.grid_size[1] - 1)
                ),
                "size": env.required_rooms[room_name].min_size,
            },
        }
        env.step(action)
        states.append(env._get_state())
    return states


def expand_states(sampled_states: list[State], actions: list[dict]):
    # Expand states using valid actions
    expanded_states = []
    for state in sampled_states:
        env.set_state(state)
        for action in actions:
            next_state, reward, _, _ = env.step(action)
            # Add new state if it wasn't visited before
            key = state_to_key(next_state)
            if key not in state_value_map and len(state_value_map) < max_states:
                state_value_map[key] = (action, 0.0)
                expanded_states.append(next_state)
    return expanded_states

# Sample initial states
sampled_states = sample_initial_states()
for state in sampled_states:
    state_value_map[state_to_key(state)] = ('any', 0.0)

for i in range(num_iterations):
    new_state_value_map = copy.deepcopy(state_value_map)

    expanded_states = expand_states(sampled_states, actions)

    # Update value function
    for state in sampled_states:
        env.set_state(state)
        max_next_value = 0
        next_action = 'any'
        for action in actions:
            next_state, reward, _, _ = env.step(action)
            key = state_to_key(next_state)
            _, value = state_value_map.get(key, ('any', 0.0))
            V_t = reward + gamma * value
            max_next_value = max(max_next_value, V_t)
            if max_next_value == V_t:
                next_action = action

        # Update the value of the current state
        new_state_value_map[state_to_key(state)] = (next_action, max_next_value)

    # Replace old value map with the new one
    state_value_map = new_state_value_map

    # Deduplicate states and merge expanded states
    sampled_states.extend(expanded_states)
    sampled_states = list({state_to_key(s): s for s in sampled_states}.values())
    
state_value_map

In [11]:
import pandas as pd

In [82]:
value_df = pd.DataFrame(state_value_map, index=['action', 'value']).T

In [91]:
state_to_key(init_state) == value_df.index

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [92]:
value_df.index[0]

"[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]]{'living': {'id': 1, 'type': <RoomType.LIVING: 'living'>, 'position': (2, 6), 'size': (4, 4)}, 'bedroom': {'id': 2, 'type': <RoomType.BEDROOM: 'bedroom'>, 'position': (6, 7), 'size': (3, 3)}, 'entry': {'id': 3, 'type': <RoomType.ENTRY: 'entry'>, 'position': (3, 2), 'size': (2, 2)}, 'corridor': {'id': 4, 'type': <RoomType.CORRIDOR: 'corridor'>, 'position': (2, 1), 'size': (1, 3)}}1"

In [84]:
value_df.index

Index(['[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]]{'living': {'id': 1, 'type': <RoomType.LIVING: 'living'>, 'position': (2, 6), 'size': (4, 4)}, 'bedroom': {'id': 2, 'type': <RoomType.BEDROOM: 'bedroom'>, 'position': (6, 7), 'size': (3, 3)}, 'entry': {'id': 3, 'type': <RoomType.ENTRY: 'entry'>, 'position': (3, 2), 'size': (2, 2)}, 'corridor': {'id': 4, 'type': <RoomType.CORRIDOR: 'corridor'>, 'position': (2, 1), 'size': (1, 3)}}1',
       '[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 0 2 2 2]\n [0 0 0 0 0 0 0 2 2 2]\n [0 0 0 0 0 0 0 2 2 2]\n [0 0 0 0 0 0 0 0 0 0]]{'living': {'id': 1, 'type': <RoomType.LIVING: 'living'>, 'position': (2, 6), 'size': (4, 4)}, 'bedroom': {'id': 2, 'typ

In [83]:
value_df.iloc[np.where(value_df.value == value_df.value.max())[0][0]].action

{'type': 'add_room',
 'params': {'name': 'living',
  'room_type': <RoomType.LIVING: 'living'>,
  'position': (0, 0),
  'size': (4, 4)}}