In [1]:
import json
import random
import numpy as np

def generate_random_grid(
        grid_size,
        num_obstacles,
        num_lightnings,
        alpha=3.0,
        beta=1.0,
        num_actions=4,
        gamma=0.95,
        pi_iters=100,
        p_correct_action=0.8,
        bp_iters=10,
        run_bp=False,
        seed=None
    ):
    """
    Generates a random grid configuration and returns it as a JSON-formatted string.
    """

    if seed is not None:
        random.seed(seed)
        np.random.seed(seed)

    n_states = grid_size * grid_size
    all_cells = set(range(n_states)) - {0}

    start_state = 0

    # 1. Treasure (reward +1)
    treasure = random.choice(list(all_cells))

    # 2. Lightnings (terminal -1)
    remaining = list(all_cells - {treasure})
    lightning_cells = random.sample(remaining, num_lightnings)

    # 3. Obstacles
    remaining = list(all_cells - {treasure} - set(lightning_cells))
    obstacle_cells = random.sample(remaining, num_obstacles)

    # 5. Reward states dictionary
    reward_states = {str(treasure): 1.0}
    for L in lightning_cells:
        reward_states[str(L)] = -1.0

    # 6. Terminal states
    terminal_states = [treasure] + lightning_cells

    # 7. Evidence (empty by default)
    evidence = {}

    # Final config dict
    config = {
        "grid_size": grid_size,
        "reward_states": reward_states,
        "obstacle_states": obstacle_cells,
        "terminal_states": terminal_states,
        "alpha": alpha,
        "beta": beta,
        "num_actions": num_actions,
        "start_state": start_state,
        "evidence": evidence,
        "bp_iters": bp_iters,
        "run_bp": run_bp,
        "gamma": gamma,
        "pi_iters": pi_iters,
        "episodes": 1000,
        "p_correct_action": p_correct_action
    }

    # Convert to JSON string with correct formatting
    json_string = json.dumps(config, indent=2)
    print(json_string)

    return json_string


In [2]:

size = 10
n_cells = size * size
num_obstacles = int(n_cells * .1)
num_lightnings = int(n_cells * .05)
generate_random_grid(size, num_obstacles, num_lightnings)

{
  "grid_size": 10,
  "reward_states": {
    "9": 1.0,
    "55": -1.0,
    "7": -1.0,
    "48": -1.0,
    "34": -1.0,
    "52": -1.0
  },
  "obstacle_states": [
    57,
    36,
    86,
    69,
    63,
    76,
    18,
    11,
    70,
    90
  ],
  "terminal_states": [
    9,
    55,
    7,
    48,
    34,
    52
  ],
  "alpha": 3.0,
  "beta": 1.0,
  "num_actions": 4,
  "start_state": 0,
  "evidence": {},
  "bp_iters": 10,
  "run_bp": false,
  "gamma": 0.95,
  "pi_iters": 100,
  "episodes": 1000,
  "p_correct_action": 0.8
}


'{\n  "grid_size": 10,\n  "reward_states": {\n    "9": 1.0,\n    "55": -1.0,\n    "7": -1.0,\n    "48": -1.0,\n    "34": -1.0,\n    "52": -1.0\n  },\n  "obstacle_states": [\n    57,\n    36,\n    86,\n    69,\n    63,\n    76,\n    18,\n    11,\n    70,\n    90\n  ],\n  "terminal_states": [\n    9,\n    55,\n    7,\n    48,\n    34,\n    52\n  ],\n  "alpha": 3.0,\n  "beta": 1.0,\n  "num_actions": 4,\n  "start_state": 0,\n  "evidence": {},\n  "bp_iters": 10,\n  "run_bp": false,\n  "gamma": 0.95,\n  "pi_iters": 100,\n  "episodes": 1000,\n  "p_correct_action": 0.8\n}'