In [1]:
import copy

initial_bays = [[1,2], [], [4], []]
initial_crane_position = 0
initial_crane_container_held = 3
initial_cost = 0

# Created a function to print a clearer visualization
# of the state

# This visualization is rotated clockwise: right and left are
# visualized as down and up.

def print_state(state_and_time):
    state = state_and_time[0]
    for i in range(len(state[0])):
        if i == state[1]:
            print(str(state[0][i])+" <=["+str(state[2])+"]-")
        else:
            print(str(state[0][i]))
    print("Total cost: " +str(state_and_time[1]))
    print()

# Created an object with information about the initial state

initial_state = ([initial_bays, initial_crane_position, initial_crane_container_held], 0)

print_state(initial_state)


[1, 2] <=[3]-
[]
[4]
[]
Total cost: 0



In [2]:
# Here I created a function that allows us to compute the next state we end up
# with if we apply an action to a state

def perform_action(state, action):
    new_state = copy.deepcopy(state[0])
    new_state_cost = state[1]
    cost = 0
    if action == "RIGHT":
        if new_state[1] >= (len(new_state[0]) -1):
            return None
        new_state[1] = int(new_state[1])+1
        cost = 1

    if action == "LEFT":
        if new_state[1] <= 0:
            return None
        new_state[1] = int(new_state[1])-1
        cost = 1
    
    if action == "DROP":
        if new_state[2] == None:
            return None
        container = new_state[2]
        new_state[2] = None
        new_state[0][new_state[1]].append(container)
        cost = 2

    if action == "PICK":
        if new_state[2] != None:
            return None
        if len(new_state[0][new_state[1]]) == 0:
            return None
        container = new_state[0][new_state[1]].pop()
        new_state[2] = container
        cost = 3
    return new_state, new_state_cost+cost

def perform_action_sequence(state, actions):
    new_state = state
    for action in actions:
        new_state = perform_action(new_state, action)
    return new_state

print("The cost of this move is "+str(perform_action(initial_state, "RIGHT")[1]))
print_state(perform_action(initial_state, "RIGHT"))

print("The cost of this action sequence is "+str(perform_action_sequence(initial_state, ["RIGHT", "DROP"])[1]))
print_state(perform_action_sequence(initial_state, ["RIGHT", "DROP"]))

print("The cost of this action sequence is "+str(perform_action_sequence(initial_state, ["RIGHT", "DROP", "RIGHT", "PICK"])[1]))
print_state(perform_action_sequence(initial_state,["RIGHT","DROP","RIGHT","PICK"]))



The cost of this move is 1
[1, 2]
[] <=[3]-
[4]
[]
Total cost: 1

The cost of this action sequence is 3
[1, 2]
[3] <=[None]-
[4]
[]
Total cost: 3

The cost of this action sequence is 7
[1, 2]
[3]
[] <=[4]-
[]
Total cost: 7



In [3]:
import copy
import time
import random
import numpy as np

all_actions = ["DROP","PICK","LEFT","RIGHT"]

def is_action_valid(state, action):
    if perform_action(state, action):
      return True
    else:
      return False

# In this case, the goal is to have each loading bay hold at least one container
def is_goal_state(state):
  for bay in state[0]:
    if len(bay) == 0:
      return False
  return True

# Implements a random tree search. Nodes in the frontier are expanded in
# a random order
def random_search(initial_state, possible_actions = all_actions):

    frontier = []

    frontier.append((copy.deepcopy(initial_state),[]))

    start = time.time()

    while frontier:

        state_index = 0

        # choose a random state in the frontier to expand
        state = random.choice(frontier)
        frontier.remove(state)

        if is_goal_state(state[0][0]):
            return state

        for action in possible_actions:
            # if the action is applicable in the given state
            if is_action_valid(state[0],action):
                # apply the action
                new_state = perform_action(state[0],action)
                new_actions = state[1].copy()
                new_actions.append(action)
                # add the new state in the frontier if it is a new state
                frontier.append((new_state, new_actions))

        # while not always necessary, it is a good idea in practice
        # to limit the execution of a potentially non-terminating
        # algorithm. For example by limiting the time it has available
        # before forcing it to terminate
        end = time.time()
        if end-start > 20:
            raise TimeoutError("Execution is taking too long to terminate.")

print("Starting from this initial state")
print_state(initial_state)
print("I use a random search to ensure each loading bay has at least one container")
solution = random_search((initial_state))
print("Plan found:")
print(str(solution[1]))
print("State after performing the plan:")
print_state(solution[0])



Starting from this initial state
[1, 2] <=[3]-
[]
[4]
[]
Total cost: 0

I use a random search to ensure each loading bay has at least one container
Plan found:
['RIGHT', 'DROP', 'PICK', 'DROP', 'LEFT', 'PICK', 'DROP', 'PICK', 'RIGHT', 'RIGHT', 'RIGHT', 'DROP']
State after performing the plan:
[1]
[3]
[4]
[2] <=[None]-
Total cost: 22

