In [49]:
from architectural_principles import ArchitecturalConstraints, State
from train import ExperimentManager
from environment import ArchitecturalEnvironment
import numpy as np
import copy
def_rooms = ArchitecturalConstraints.default_rooms()
config = {
    'env': {
        'grid_size': (10, 10),
        'max_steps': 500,
        'required_rooms': {room: def_rooms[room] for room in list(def_rooms.keys())[:1]}
    },
    'algorithms': {
        'value_iteration': {
            'gamma': .95,
            'theta': 0.001
        }
    }
}



In [52]:
env = ArchitecturalEnvironment((10, 10), 50, def_rooms)
init_state = env.reset()
state_value_map = {}
sampled_states = [init_state]
actions = ExperimentManager(config)._get_action_space()
num_iterations = 5
max_states = 100
gamma = 0.95

def state_to_key(state) -> str:
    """Convert state to a hashable key."""
    return str(state.layout) + str(state.placed_rooms) + str(state.current_step)

def sample_initial_states():
    """Generate initial random states."""
    states = []
    env.reset()
    for room_name in env.required_rooms.keys():
        action = {
            "type": "add_room",
            "params": {
                "name": room_name,
                "room_type": env.required_rooms[room_name].room_type,
                "position": (
                    np.random.randint(0, env.grid_size[0] - 1),
                    np.random.randint(0, env.grid_size[1] - 1)
                ),
                "size": env.required_rooms[room_name].min_size,
            },
        }
        env.step(action)
        states.append(env._get_state())
    return states


def expand_states(sampled_states: list[State], actions: list[dict]):
    # Expand states using valid actions
    expanded_states = []
    for state in sampled_states:
        env.set_state(state)
        for action in actions:
            next_state, reward, _, _ = env.step(action)
            # Add new state if it wasn't visited before
            key = state_to_key(next_state)
            if key not in state_value_map and len(state_value_map) < max_states:
                state_value_map[key] = (action, 0.0)
                expanded_states.append(next_state)
    return expanded_states

# Sample initial states
sampled_states = sample_initial_states()
for state in sampled_states:
    state_value_map[state_to_key(state)] = ('any', 0.0)

for i in range(num_iterations):
    new_state_value_map = copy.deepcopy(state_value_map)

    expanded_states = expand_states(sampled_states, actions)

    # Update value function
    for state in sampled_states:
        env.set_state(state)
        max_next_value = 0
        next_action = 'any'
        for action in actions:
            next_state, reward, _, _ = env.step(action)
            key = state_to_key(next_state)
            V_t = reward + gamma * state_value_map.get(key, 1)
            max_next_value = max(max_next_value, V_t)
            if max_next_value == V_t:
                next_action = action

        # Update the value of the current state
        new_state_value_map[state_to_key(state)] = (next_action, max_next_value)

    # Replace old value map with the new one
    state_value_map = new_state_value_map

    # Deduplicate states and merge expanded states
    sampled_states.extend(expanded_states)
    sampled_states = list({state_to_key(s): s for s in sampled_states}.values())
    
state_value_map

{"[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]]{'living': {'id': 1, 'type': <RoomType.LIVING: 'living'>, 'position': (4, 4), 'size': (4, 4)}, 'bedroom': {'id': 2, 'type': <RoomType.BEDROOM: 'bedroom'>, 'position': (3, 0), 'size': (3, 3)}, 'bathroom': {'id': 3, 'type': <RoomType.BATHROOM: 'bathroom'>, 'position': (7, 0), 'size': (2, 2)}, 'entry': {'id': 4, 'type': <RoomType.ENTRY: 'entry'>, 'position': (1, 4), 'size': (2, 2)}, 'corridor': {'id': 5, 'type': <RoomType.CORRIDOR: 'corridor'>, 'position': (1, 7), 'size': (1, 3)}}1": ('any',
  0.0),
 "[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 1 1 1 1 0 0]\n [2 2 2 0 1 1 1 1 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]]{'living': {'id'

In [11]:
import pandas as pd

In [57]:
value_df = pd.DataFrame(state_value_map, index=['action', 'value']).T

In [66]:
value_df.value.max()

2.208142857142857

In [65]:
value_df.iloc[np.where(value_df.value == value_df.value.max())[0][0]]

action    {'type': 'add_room', 'params': {'name': 'livin...
value                                              2.208143
Name: [[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [2 2 2 0 0 0 0 0 0 0]\n [2 2 2 0 1 1 1 1 0 0]\n [2 2 2 0 1 1 1 1 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 1 1 1 1 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]]{'bedroom': {'id': 2, 'type': <RoomType.BEDROOM: 'bedroom'>, 'position': (3, 0), 'size': (3, 3)}, 'bathroom': {'id': 3, 'type': <RoomType.BATHROOM: 'bathroom'>, 'position': (7, 0), 'size': (2, 2)}, 'entry': {'id': 4, 'type': <RoomType.ENTRY: 'entry'>, 'position': (1, 4), 'size': (2, 2)}, 'corridor': {'id': 5, 'type': <RoomType.CORRIDOR: 'corridor'>, 'position': (1, 7), 'size': (1, 3)}, 'living': {'id': 6, 'type': <RoomType.LIVING: 'living'>, 'position': (6, 6), 'size': (4, 4)}}2, dtype: object