In [1]:
import yaml
import os
import heapq
import random
from collections import defaultdict

In [None]:
benchmark_maps_directory = r'C:\Users\owner\Documents\PhD\TierLab\VBTA - Original Commented\MAPF_benchmark_maps'

In [None]:
"""COMPLETED - DONT RUN AGAIN"""
def load_map_yaml(map_filename):
    with open(map_filename, 'r') as f:
        type = f.readline().strip() # type of map, not used
        height_line = f.readline().strip() # height dimension
        width_line = f.readline().strip() # width dimension
        map_line = f.readline().strip() # deliniation line, not used

        height = int(height_line.split()[1]) # just number dimensions
        width = int(width_line.split()[1])

        grid = []
        obstacles = []
        for _ in range(height):
            row_data = f.readline().strip()
            if len(row_data) != width:
                raise ValueError(f"Map file error: row length {len(row_data)} != width {width}") # sanity check for malformed map files
            
            # periods and G's are traversable terrain, everything else will be unpassable, 
            # there are 5 types of unpassable terrains, water will be unpassable
            # row = [0 if c == '.' or c == 'G' else 1 for c in row_data] 
            # 1 means blocked, 0 means free
            row = []
            for c in row_data:
                if c in [".", "G"]:
                    row.append(0)
                else:
                    row.append(1)
                    obstacles.append((len(grid) + 1, len(row)))
            grid.append(row)
        
        map_dict = {
            "map" : {
                "dimensions" : [height, width],
                "obstacles" : obstacles
                # [(x, y) for (x, y) in c for c in grid if c == 1]
            },
        }
    
    return grid, map_dict

In [None]:
"""TEST"""
# map_file = r'C:\Users\owner\Documents\PhD\TierLab\VBTA - Original Commented\MAPF_benchmark_maps\arena.map'

In [None]:
"""TEST"""
# grid, map_dict = load_map_yaml(map_file)

In [None]:
"""TEST"""
# with open('data.yaml', 'w') as f:
#     yaml.dump(map_dict, f)

In [None]:
"""COMPLETED"""
# for filename in os.listdir(benchmark_maps_directory):
#     f = os.path.join(benchmark_maps_directory, filename)
#     if os.path.isfile(f):
#         grid, map_dict = load_map_yaml(f)
#         with open(filename + ".yaml", 'w') as out:
#             yaml.dump(map_dict, out)

In [3]:
"""same as above without the dictionary because we need the grid representation"""
def load_map(map_filename):
    """Function to load ascii maps from the MAPF benchmark .map files"""
    with open(map_filename, 'r') as f:
        _type = f.readline().strip() # Type octile or similar
        height_line = f.readline().strip() # Height of map e.g. "height 45"
        width_line = f.readline().strip() # Width of map e.g. "width 52"
        map_line = f.readline().strip() # delinates start of map with line that says "map"

        height = int(height_line.split()[1])
        width = int(width_line.split()[1])

        grid = []
        for _ in range(height):
            row_data = f.readline().strip()
            if len(row_data) != width:
                raise ValueError(f"Map file error: row length {len(row_data)} != width {width}") # sanity check for malformed map files
            # periods and G's are traversable terrain, everything else will be unpassable, there are 5 types of unpassable terrains, water will be unpassable
            # row = [0 if c == '.' or c == 'G' else 1 for c in row_data] 
            # 1 means blocked, 0 means free
            row = []
            for c in row_data:
                if c in [".", 'G']:
                    row.append(0)
                else:
                    row.append(1)
            grid.append(row)

    return grid

In [4]:
def get_random_free_position(grid, occupied_positions, agents_to_add):
    """
    Parameters
    - grid: 2D list of 0/1 cells representing the map free/obstacles
    - occupied_positions: exisiting agent positions, places we want to consider
      blocked when choosing a new position

    Returns: a single (row, col) position for one agent,
            randomly from free cells with value = 0 that are not occupied
    """
    free_cells = []
    # iterate thru entire grid and find the free cells, make a list of them for choosing from
    for r in range(len(grid)):
        for c in range(len(grid[0])):
            if grid[r][c] == 0 and grid[r][c] not in occupied_positions:
                free_cells.append((r, c))

    if not free_cells:
       raise ValueError("No free cells available to place a robot!")
    
    # randomly sample without replacement
    # 2 * agents to add so we pick a start and end position for each agent at the same time
    chosen = random.sample(free_cells, 2 * agents_to_add) 

    return chosen

In [5]:
def pick_random_map(directory, number_of_maps):
    if not os.path.isdir:
        return None
    
    maps = os.listdir(directory)
    if not maps:
        return None
    
    random_maps = random.sample(maps, number_of_maps)
    return random_maps
# use os.path.join(directory, map) for each map in random_maps to get full path

In [None]:
"""DONE"""
# random_maps = pick_random_map(benchmark_maps_directory, 40)

In [None]:
"""DONE"""
# random_maps

['den203d.map',
 'den308d.map',
 'lak505d.map',
 'lak405d.map',
 'orz103d.map',
 'den504d.map',
 'den011d.map',
 'lak308d.map',
 'orz000d.map',
 'lak201d.map',
 'lak515d.map',
 'oth000d.map',
 'brc501d.map',
 'lak109d.map',
 'brc203d.map',
 'den200n.map',
 'den001d.map',
 'den400d.map',
 'den407d.map',
 'den998d.map',
 'den200d.map',
 'den500d.map',
 'orz800d.map',
 'lak401d.map',
 'orz107d.map',
 'lak519d.map',
 'den000d.map',
 'orz700d.map',
 'den206d.map',
 'den602d.map',
 'ost000t.map',
 'lak513d.map',
 'den020d.map',
 'orz100d.map',
 'den600d.map',
 'orz303d.map',
 'orz703d.map',
 'lak404d.map',
 'brc505d.map',
 'orz102d.map']

In [9]:
base_directory = r'C:\Users\owner\Documents\PhD\TierLab\VBTA - Original Commented'

In [None]:
"""DONE"""
# random_map_dict = {
#     "map_base_directory" : base_directory,
#     "map_files" : random_maps,
# }

In [None]:
"""DONE"""
# random_map_dict

{'map_base_directory': 'C:\\Users\\owner\\Documents\\PhD\\TierLab\\VBTA - Original Commented',
 'map_files': ['den203d.map',
  'den308d.map',
  'lak505d.map',
  'lak405d.map',
  'orz103d.map',
  'den504d.map',
  'den011d.map',
  'lak308d.map',
  'orz000d.map',
  'lak201d.map',
  'lak515d.map',
  'oth000d.map',
  'brc501d.map',
  'lak109d.map',
  'brc203d.map',
  'den200n.map',
  'den001d.map',
  'den400d.map',
  'den407d.map',
  'den998d.map',
  'den200d.map',
  'den500d.map',
  'orz800d.map',
  'lak401d.map',
  'orz107d.map',
  'lak519d.map',
  'den000d.map',
  'orz700d.map',
  'den206d.map',
  'den602d.map',
  'ost000t.map',
  'lak513d.map',
  'den020d.map',
  'orz100d.map',
  'den600d.map',
  'orz303d.map',
  'orz703d.map',
  'lak404d.map',
  'brc505d.map',
  'orz102d.map']}

In [None]:
"""DONE"""
# with open('random_maps.yaml', 'w') as f:
#     yaml.dump(random_map_dict, f)

In [None]:
with open('random_maps.yaml', 'r') as f:
    config = yaml.safe_load(f)

base_dir = config['map_base_directory']
map_filenames = config['map_files']

full_map_paths = [os.path.join(base_dir, fname) for fname in map_filenames]

In [16]:
print(full_map_paths[0])
print(type(full_map_paths[0]))

C:\Users\owner\Documents\PhD\TierLab\VBTA - Original Commented\den203d.map
<class 'str'>


In [None]:
# load each map
# get the random free positions for 10 - 100 agents by 10's 
    # this involves getting a start and goal position for each agent
# then store these locations with the agent name (such as agent0, agent1, etc.) in a yaml file
# THEN ill need to add a copy of the map information to each of those yaml files from the Processed_Benchmarks directory


"""I THINK it might be the case we load each of the 40 maps into a pytorch geometric data object directly using either the 
.map file or the list of obstacles that I have for each map. Since these graphs are all fully connected, I just need to create
a data object with all the nodes, making sure all obstacles are represented."""

# PLAN

### MISC
- [X] migrate working CBS and visualization over
- [X] check for functionality
- [ ] decide on how to calculate difficulty metric (multi-output regression, use each predicted metric for its own statistic, like cost means this, makespan means that, etc, with one overall equally weighted normalized linear combination of the metrics (not predicted, just computed after))

### DATA STRUCTURE
- [X] all that is in the output is a cost for the solution and a schedule for each agent saying where they should be at each timestep
- [X] add to output makespan, high level nodes expanded (constraint tree size), total conflicts identified
- [X] graph data must be represented somehow (either adjacency matrix, or list of edges)
    - [X] after researching ways to represent graphs it appears that the most computationally inexpensive way is using a list of edges (O(E)) instead of an adjacency matrix (up to O(V<sup>2</sup>))

### PROCESSING MAPS
- [X] pick the 40 maps we will be using randomly (pick_random_map)
- [ ] need to process the maps further and create yaml files with an edge index, node positions (coordinates?), and node features (x_base = is_free, is_obstacle, is_start_node, is_goal_node) (maps fully connected in 4 DIRECTIONS ONLY NOT 8 breaks cbs)
- [ ] optional instance specific features (number of agents, average manhattan distance between every start/goal pair, KEEP IT SIMPLE TO START IF IT WORKS TRY ADDING MORE, store as seperate tensor like data.u)
    - [ ] save the static maps for re-use
- [ ] generate CBS data by:
    - [ ] get random start and goal positions for the chosen maps for a set of agents [10, 20, 30, ..., etc.] (load_map) (get_random_free_position)
    - [ ] record each of the agent start and goal positions and agent name [agent0, agent1, ..., etc] in a dictionary called agents (loop get_random_free_position and make a dict for each map then write it in yaml)
    - [ ] create new yaml files for each of the maps, for each count of agents [map1<sub>10</sub>agent.yaml, map1<sub>20</sub>agent.yaml, map1<sub>30</sub>agent.yaml, ..., etc] (for each of the chosen processed benchmark maps, repeat the two steps above 3 times for every amount from 10 - 100 agents going by 10's. we will take 3 of each example to ensure an average performace)
    - [ ] now we have completed yaml files that should be solvable by the new CBS code, run CBS on each of the maps
- [ ] take the input, output, and further processed yaml files with edge lists, and create the torch_geometric.data.Data object
    - [ ] data.x is input, data.edge_index, data.pos, data.u, data.y is the targets (all 4) 
- [ ] predict all metrics with multi-output regression GNN, need to normalize target metrics first 

### LEARNING
- [ ] Need 3 models, 
    - [ ] one for a graph level regression task to give us a difficulty metric to compare graphs against each other
    - [ ] one for a node level prediction, maybe classification, maybe regression, some sort of difficulty metric per node? perhaps regression all the way to get continuous results instead of binary
    - [ ] one for edge level prediction, again regression, again difficulty of edges (optional depending on the amount of work)
- [ ] the node and edge level predictions should give us the potential to make a heatmap overlay (optional depending on the amount of work)


- [XXX] LLM based suitability rating
    - one function call that takes in both profile and task description and gives us a suitability number to vote on
        - prompt engineering
- [X] Natural language description of task and robot (unstructured, natural language) / replace random robot and task generator (still structured, dictionary)
    - LLM takes in natural language prompt (or character sheet), and make the dictionary from that, tasks too
- [X] LLM directly assign robots to tasks : feed in descriptions and tasks and see what comes out (replace voting completely with LLM)
    - compare to hybrid voting approach
    - great comparison works or not
- Replace CBS with LLM
    - give LLM map, start and goal, and tell it to plan
    - how to do conflict resolution when LLM gets it wrong
    - 

# HIGH LEVEL PLAN
- [ ] Full debug
- [ ] LLM integration
- [ ] run data

# Empirical Hardness spin off
    - re-process maps in a way that they are solvable by an LLM
    - change the way they are represented
    - get a plan from the LLM (an actual path for each agent)
## HARD PART
    - check for the plan correctness (brute force (fastest))
    - step through each path and check for edge or node conflicts
    - basically make CBS with an LLM
    - ask the LLM to resolve the conflict
    - Ill find the conflicts and the LLM will be asked to resolve and output a new plan


- present both project plans and see what she accepts