In [1]:
from architectural_principles import ArchitecturalConstraints, State
from train import ExperimentManager
from environment import ArchitecturalEnvironment

In [2]:
config = {
    'env': {
        'grid_size': (10, 10),
        'max_steps': 500,
        'required_rooms': ArchitecturalConstraints.default_rooms()
    },
    'algorithms': {
        'value_iteration': {
            'gamma': .95,
            'theta': 0.001
        }
    }
}

def_rooms = ArchitecturalConstraints.default_rooms()

In [7]:
import numpy as np
import copy
env = ArchitecturalEnvironment((10, 10), 50, def_rooms)
init_state = env.reset()
state_value_map = {}
sampled_states = [init_state]
actions = ExperimentManager(config)._get_action_space()
num_iterations = 30
max_states = 1000
gamma = 0.95

def state_to_key(state) -> str:
    """Convert state to a hashable key."""
    return str(state.layout) + str(state.placed_rooms) + str(state.current_step)

def sample_initial_states():
    """Generate initial random states."""
    states = []
    env.reset()
    for room_name in env.required_rooms.keys():
        action = {
            "type": "add_room",
            "params": {
                "name": room_name,
                "room_type": env.required_rooms[room_name].room_type,
                "position": (
                    np.random.randint(0, env.grid_size[0] - 1),
                    np.random.randint(0, env.grid_size[1] - 1)
                ),
                "size": env.required_rooms[room_name].min_size,
            },
        }
        env.step(action)
        states.append(env._get_state())
    return states


def expand_states(sampled_states: list[State], actions: list[dict]):
    # Expand states using valid actions
    expanded_states = []
    for state in sampled_states:
        env.set_state(state)
        for action in actions:
            next_state, reward, _, _ = env.step(action)
            # Add new state if it wasn't visited before
            key = state_to_key(next_state)
            if key not in state_value_map and len(state_value_map) < max_states:
                state_value_map[key] = 0.0
                expanded_states.append(next_state)
    return expanded_states

# Sample initial states
sampled_states = sample_initial_states()
for state in sampled_states:
    state_value_map[state_to_key(state)] = 0.0

for _ in range(num_iterations):
    new_state_value_map = copy.deepcopy(state_value_map)

    expanded_states = expand_states(sampled_states, actions)

    # Update value function
    for state in sampled_states:
        env.set_state(state)
        max_next_value = 0
        for action in actions:
            next_state, reward, _, _ = env.step(action)
            key = state_to_key(next_state)
            max_next_value = max(max_next_value, reward + gamma * state_value_map.get(key, 0))

        # Update the value of the current state
        new_state_value_map[state_to_key(state)] = max_next_value

    # Replace old value map with the new one
    state_value_map = new_state_value_map

    # Deduplicate states and merge expanded states
    sampled_states.extend(expanded_states)
    sampled_states = list({state_to_key(s): s for s in sampled_states}.values())
    
state_value_map

KeyError: 7