# Simulator with A* agents to create dataset for learning models

In [34]:
from view.tkView import TkView
from control.astarAgent import AStarAgent
from model.model import Model
import random
import numpy as np
from time import sleep

NUM_OF_MAPS = 30
UNIT_TIME = 1200
ORDER_SIZE = 100

all_states = []
all_labels = []

# simulate different maps
for map_num in range(NUM_OF_MAPS):
    # generate model for training data
    agents = 3
    model = Model((random.randint(10, 15), random.randint(10, 15)),
                  agents, ORDER_SIZE)
    view = TkView(model)

    # agent to create data
    labeller = AStarAgent()
    labeller.initialise(model)

    # simulate training data (state) for each agent
    for _ in range(UNIT_TIME):
        for agent in model.agents:
            # don't move if no target
            if agent.target_pos is None:
                continue
            # check has energy to move
            if agent.energy > 0:
                bestMove = labeller._choose_action(agent)

                if None != bestMove:
                    # save state (x)
                    neighbours = model.get_neighbour_encoded(agent)
                    # states: agent_x, agent_y, target_x, target_y,
                    # at_up, at_down, at_left, at_right,
                    # orderItem, hasItem, energy, charge
                    state = [
                        agent.pos[0], agent.pos[1], agent.target_pos[0], agent.target_pos[1],
                        neighbours[0], neighbours[1], neighbours[2], neighbours[3],
                        agent.hasItem, agent.energy, agent.charge
                    ]
                    all_states.append(state)
                    # save move (y)
                    bestEncoded = labeller.encode_move(agent.pos, bestMove)
                    all_labels.append(bestEncoded)

                    # move agent to find next state to simulate
                    agent.prev_pos = agent.pos
                    agent.pos = bestMove

                # allow stay-still data at charging station
                elif model.grid[agent.pos] == 2:
                    # save state (x)
                    neighbours = model.get_neighbour_encoded(agent)
                    # states: agent_x, agent_y, target_x, target_y,
                    # at_up, at_down, at_left, at_right, charge
                    state = [
                        agent.pos[0], agent.pos[1], agent.target_pos[0], agent.target_pos[1],
                        neighbours[0], neighbours[1], neighbours[2], neighbours[3],
                        agent.hasItem, agent.energy, agent.charge
                    ]
                    all_states.append(state)
                    # save move (y)
                    all_labels.append(4) # stay still

            # return target order back to list, as no longer able to complete (dead)
            elif agent.orderItem is not None:
                model.order_list.insert(0, agent.orderItem)
                agent.orderItem = None

        # update model info
        model.update_status()
        model.timeStep += 1

        # view.render()

        # break loop no agent working (all finished or all dead)
        if all(agent.orderItem is None for agent in model.agents):
            break
    view.root.destroy()
    print(f"Map {map_num+1} simulated in {model.timeStep} moves. Orders collected: {ORDER_SIZE-len(model.order_list)}/{ORDER_SIZE}")
all_states = np.array(all_states)
all_labels = np.array(all_labels)

Map 1 simulated in 1200 moves. Orders collected: 100/100
Map 2 simulated in 1200 moves. Orders collected: 82/100
Map 3 simulated in 1200 moves. Orders collected: 91/100
Map 4 simulated in 1200 moves. Orders collected: 97/100
Map 5 simulated in 1200 moves. Orders collected: 92/100
Map 6 simulated in 1200 moves. Orders collected: 88/100
Map 7 simulated in 1137 moves. Orders collected: 100/100
Map 8 simulated in 1200 moves. Orders collected: 85/100
Map 9 simulated in 1200 moves. Orders collected: 87/100
Map 10 simulated in 1200 moves. Orders collected: 99/100
Map 11 simulated in 1200 moves. Orders collected: 99/100
Map 12 simulated in 1200 moves. Orders collected: 100/100
Map 13 simulated in 1200 moves. Orders collected: 97/100
Map 14 simulated in 1200 moves. Orders collected: 93/100
Map 15 simulated in 1200 moves. Orders collected: 85/100
Map 16 simulated in 1200 moves. Orders collected: 54/100
Map 17 simulated in 1191 moves. Orders collected: 100/100
Map 18 simulated in 1200 moves. Orde

In [35]:
print(all_states.shape)
all_states
# np.where(all_states==5)

(103622, 11)


array([[  4,   4,  11, ...,   0, 119,   0],
       [  1,   1,   6, ...,   0, 119,   0],
       [  9,   7,   3, ...,   0, 119,   0],
       ...,
       [  5,  10,  13, ...,   1,  85,   0],
       [ 12,   8,  13, ...,   1, 105,   0],
       [ 11,  10,  13, ...,   1,  63,   0]], shape=(103622, 11))

In [36]:
print(all_labels.shape)
all_labels

(103622,)


array([1, 1, 0, ..., 1, 3, 1], shape=(103622,))

# Save data

In [38]:
# save simulated data!
from joblib import dump
dump(all_states, "simulated_states.joblib")
dump(all_labels, "simulated_labels.joblib")

['simulated_labels.joblib']

In [25]:
# load simulated data
from joblib import load
all_states = load("simulated_states.joblib")
all_labels = load("simulated_labels.joblib")