In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np
import json
from simulation_utils import *
from behavior_model_utils import * 

## Human Behavior Model in Single-Player Grid 6x6 Environments

In [2]:
grid_size = 6
search_depth = 10

# Load pre-trained human behavior model for 6x6 grid
model = torch.load('../behavior-models/human_model_grid6.pt', map_location=torch.device('cpu'))
model.eval()

HumanNN(
  (fc1): Linear(in_features=144, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=1024, bias=True)
  (fc3): Linear(in_features=1024, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=4, bias=True)
  (sm): Softmax(dim=1)
)

In [3]:
# Set random seed for reproducibility
seed_number = 42
random.seed(seed_number)
np.random.seed(seed_number)
torch.manual_seed(seed_number)

# Randomly choose the number of blocked positions based on the grid size
num_blocked_positions = random.choice(range(grid_size * 2))

# Randomize positions for goal, special reward, blocked positions, and start position
new_goal_positions, new_special_reward_positions, new_blocked_positions, start_pos = randomize_pos(
    grid_size, 1, 0, num_blocked_positions
)

# Encode the grid design into a numpy array
layout0 = encode_grid_design_numpy(
    n=grid_size, 
    goal_positions=new_goal_positions, 
    blocked_positions=new_blocked_positions, 
    start_pos=start_pos
)

# Build the map with the given layout and start position
dis = build_map(layout0, start_pos=start_pos)

# Check if the distance to the goal is too large (i.e., invalid environment)
if dis[new_goal_positions[0][0], new_goal_positions[0][1]] >= 50:
    print('Invalid environment')
    # Here you might want to handle invalid environments or recalculate

# Generate all possible layouts
all_layout = generate_all_layout(layout0, grid_size=grid_size)

# Predict valid moves and human behavior
valids, human_pred = move_pred(all_layout, model, grid_size=grid_size)

# Get the goal position from the layout
goal_pos1 = tuple(np.argwhere(layout0[3, :, :])[0])

# Compute the human path using search with preset look ahead depth
move, wcd = compute_human_path_searchk_preset(
    layout0, all_layout, valids, model, look_ahead=search_depth, grid_size=grid_size, goal_pos=goal_pos1
)

# Print the action history
print('Action history is:', move)


Action history is: [2, 2, 2, 1, 2, 2]


## Human Behavior Model in Two-Player Grid 8x8 Environments

In [4]:
grid_size = 8
search_depth = 10

model = torch.load('../behavior-models/human_model_grid8.pt', map_location=torch.device('cpu'))
model.eval()

HumanNN(
  (fc1): Linear(in_features=160, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=4096, bias=True)
  (fc3): Linear(in_features=4096, out_features=4096, bias=True)
  (fc4): Linear(in_features=4096, out_features=5, bias=True)
  (sm): Softmax(dim=1)
)

In [5]:
# Load experiment map layout for two-player grid 8x8 environments
with open('../experiment3/map_layout_experiment3.json', 'r') as json_file:
    experiment3_layouts = json.load(json_file)

In [6]:
# Select a specific map layout for the experiment
idx = 26
seq = experiment3_layouts[idx]
block_pos, start_pos, goal_pos, _ = seq

# Convert positions to tuples
start_pos = [tuple(pos) for pos in start_pos]
goal_pos = [tuple(pos) for pos in goal_pos]

# Initialize the environment with the given parameters
env = navigation_share_env(grid_size, start_pos, goal_pos, block_pos)
env.reset()

# Set behavior sampling method
random_behavior = False  # Sample based on the probability

# Initialize rewards and hit tracking
rewards = 0
hits = [False, False]

# Initialize done flag and step counter
done = False
counts = 0
max_step = 30  # Maximum steps allowed in the experiment

# Run the experiment loop
while not done and counts <= max_step:
    # Get the current layout for both players
    s = get_both_layout(env)
    counts += 1
    
    # Get action probabilities from the model for both players
    prob1 = model.forward_beta(torch.tensor(s[0]).reshape(1, -1), beta=0.03).detach().numpy()[0]
    prob2 = model.forward_beta(torch.tensor(s[1]).reshape(1, -1), beta=0.03).detach().numpy()[0]
    
    # Select actions with the highest probabilities
    a1, a2 = int(np.argmax(prob1)), int(np.argmax(prob2))
    
    # Take a step in the environment with the selected actions
    rewards, done = env.step([int(a1), int(a2)])
    
    # Print the actions and current positions
    print(a1, a2, counts, env.cur_pos)

# Output final rewards, done flag, and step count
rewards, done, counts

0 0 1 [(3, 3), (1, 0)]
3 2 2 [(3, 2), (1, 1)]
3 2 3 [(3, 1), (1, 2)]
3 2 4 [(3, 0), (1, 3)]
0 2 5 [(3, 0), (1, 4)]
0 2 6 [(3, 0), (1, 5)]
0 2 7 [(3, 0), (1, 6)]


(10, True, 7)