In [73]:
import gym
import numpy as np
import pandas as pd
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete
from IPython.display import display
import random
import os
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [74]:
random.seed()
randomN = random.random()
print(randomN)

0.7693379815394654


In [75]:
random.seed(randomN)
randomNumber = random.random()

In [76]:
Box(low=0, high=3, shape=(3, 3, 3), dtype=int).sample()

array([[[3, 3, 3],
        [3, 1, 1],
        [2, 3, 0]],

       [[1, 1, 2],
        [1, 3, 2],
        [2, 2, 3]],

       [[1, 0, 0],
        [0, 2, 0],
        [0, 1, 0]]])

In [77]:
def fillInContainerGridRandom(grid, action_space, upperbound, percentage=0.25):
    filledIn = 0
    amountToFillIn = int((grid.shape[0] * grid.shape[1]) * percentage)
    while (filledIn < amountToFillIn):
        indexToFillIn = action_space.sample()
        rowToFillIn = grid[indexToFillIn[0]][indexToFillIn[1]]
        if (np.all(rowToFillIn)):
            continue
        elif (not np.any(rowToFillIn)):
            checkGridCopy = grid.copy()
            x_values_list = np.array([], dtype=int)
            for x_spot in checkGridCopy[indexToFillIn[0]]:
                if (x_spot[0] == 0):
                    continue
                x_values_list = np.append(x_values_list, x_spot[0])
            uniqueListOfNumbers = list(set(x_values_list))
            num_values = len(uniqueListOfNumbers)
            if (num_values < 2):
                randomShipNumber = random.randint(1, upperbound)
                checkGridCopy[indexToFillIn[0]][indexToFillIn[1]], score = fillContainerInList(rowToFillIn, randomShipNumber)
                #We encountered a boxed in(e.g. [1,0,3])
                if(checkForBoxedInContainer(checkGridCopy[indexToFillIn[0]]) < 0):
                    continue
            else:
                randomShipNumber = uniqueListOfNumbers[-1]
                checkGridCopy[indexToFillIn[0]][indexToFillIn[1]], score = fillContainerInList(rowToFillIn, randomShipNumber)
                #We encountered a boxed in(e.g. [1,3,x]) where x is to be placed and is equal to 1
                if(checkForBoxedInContainer(checkGridCopy[indexToFillIn[0]]) < 0):
                    randomShipNumber = uniqueListOfNumbers[0]
        else:
            #We don't need to check for box in since we add to the current x_row
            randomShipNumber = rowToFillIn[0]
            
        grid[indexToFillIn[0]][indexToFillIn[1]], score = fillContainerInList(rowToFillIn, randomShipNumber)
        filledIn +=1
    return grid

def fillContainerInList(currentList, container):
    newList = currentList.copy()
    indexesOfZeros = np.where(newList == 0)[0]
    # List is full of numbers
    if (len(indexesOfZeros) < 1):
        return newList, -3
    else:
        indexToChange = indexesOfZeros[0]
        newList[indexToChange] = container
        return newList, 0

In [78]:
def fillSingleContainerAtPosition(grid, y, x, containerList):
    bottomValue = grid[y][x][0]
    container = containerList[-1]
    #We get the new row and score of putting a container there based in the list availability
    grid[y][x], score = fillContainerInList(grid[y][x], container)
    if (score == 0):
        #Also give a reward if the same number is used
        if (bottomValue == 0):
            score += 2
            score += checkForBoxedInContainer(grid[y])
        elif (bottomValue == container):
            score += 5
        else:
            score -= 3
        return grid, score, np.delete(containerList, -1)
    else:
        return grid, score, containerList

In [79]:
def checkForBoxedInContainer(listOfContainerRows):
    x_values_list = np.array([], dtype=int)
    for x_spot in listOfContainerRows:
        x_values_list = np.append(x_values_list, x_spot[0])
    numberToCheckFor = np.nan
    numberToVerify = np.nan
    encounteredZero = False
    for item in x_values_list:
        #Skip empty position on grid
        if (item != 0):
            #If we already encountered a number and zero counters is more than 0, we will box in a zero -> penalty
            # e.g. x 0 0 x
            if ((not np.isnan(numberToCheckFor)) & encounteredZero):
                return -4
            if (np.isnan(numberToCheckFor)):
                numberToCheckFor = item
            if (not np.isnan(numberToVerify)):
                if (item != numberToVerify):
                    #The previous item is boxed in since that item is not equal to where we started and the next item is also different
                    return -4
            #Set number to verify if boxed in
            if ((item != numberToCheckFor) & (np.isnan(numberToVerify))):
                numberToVerify = item
        else:
            # Only check once we encountered a number
            # e.g. 0 x ... doesn't count
            if (not np.isnan(numberToCheckFor)):
                encounteredZero = True

    return 0

In [80]:
def getListOfContainers(numberOfShips, containersPerShip, multiplication=1):
    containers = np.array([], dtype=int)
    for i in range(multiplication):
        for i in range(1, numberOfShips + 1):
            containers = np.append(containers, np.full((1, containersPerShip), i, dtype=int))
    random.shuffle(containers)
    return containers

def getContainersCountAsList(containerList, current_count=None):
    numbers, counts = np.unique(containerList, return_counts=True)
    if (type(current_count) == type(None)):
        current_count = np.zeros(len(counts), dtype=int)
    #Iterate through the numbers and assign their count
    if (len(numbers) > 0):
        startIndex = numbers[-1]
        for idx, number in enumerate(numbers):
            current_count[number-1] = counts[idx]
        current_count[numbers[-1]:] = 0
        return current_count
    #Numbers that are not in the list, but still exist in the dict will be set to 0
    current_count[0:] = 0
    return current_count

In [81]:
testContainerList = getListOfContainers(5,10,4)
display(testContainerList)

array([3, 1, 1, 3, 4, 1, 5, 1, 5, 1, 1, 3, 3, 2, 5, 2, 4, 2, 3, 5, 2, 2,
       3, 3, 2, 2, 5, 3, 5, 4, 1, 3, 4, 1, 2, 5, 4, 5, 4, 2, 4, 1, 5, 4,
       4, 3, 3, 4, 1, 3, 1, 3, 2, 2, 5, 5, 1, 5, 1, 2, 3, 3, 4, 1, 4, 1,
       2, 1, 2, 3, 5, 1, 5, 3, 3, 1, 2, 1, 3, 1, 5, 3, 2, 1, 1, 3, 5, 3,
       4, 1, 3, 5, 4, 1, 1, 3, 1, 4, 5, 3, 1, 4, 5, 1, 5, 2, 1, 3, 3, 4,
       4, 2, 4, 2, 2, 2, 5, 4, 1, 4, 5, 2, 2, 2, 4, 1, 3, 5, 1, 3, 5, 2,
       3, 1, 3, 4, 5, 3, 5, 5, 5, 2, 2, 2, 3, 4, 1, 5, 1, 5, 4, 1, 5, 2,
       5, 3, 4, 4, 1, 5, 5, 4, 4, 4, 2, 5, 3, 2, 2, 2, 2, 4, 3, 2, 4, 4,
       4, 2, 5, 1, 3, 4, 4, 4, 4, 3, 1, 2, 4, 2, 3, 5, 4, 3, 2, 5, 5, 1,
       2, 5])

In [82]:
container_count = getContainersCountAsList(testContainerList)
display(container_count)

array([40, 40, 40, 40, 40])

In [83]:
test_dict = getContainersCountAsList(np.array([1, 2, 2, 3]), container_count)
display(test_dict)

array([1, 2, 1, 0, 0])

In [84]:
x = 3
y = 3
z = 3
upperbound = 3
empty3dArray = np.zeros(shape=(y, x, z), dtype=int)
action_space = MultiDiscrete([y, x], dtype=int)
empty3dArray = fillInContainerGridRandom(empty3dArray, action_space, upperbound, 0.9)
containers = getListOfContainers(upperbound, 4, z-1)
display(empty3dArray)
df, score, lists = fillSingleContainerAtPosition(empty3dArray, 0, 0, containers)
display(lists)
display(df)
display(score)

array([[[3, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[2, 0, 0],
        [2, 2, 0],
        [1, 1, 1]],

       [[0, 0, 0],
        [0, 0, 0],
        [2, 0, 0]]])

array([2, 3, 2, 2, 1, 1, 3, 2, 1, 2, 3, 1, 3, 3, 2, 2, 2, 1, 3, 3, 1, 3,
       1])

array([[[3, 1, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[2, 0, 0],
        [2, 2, 0],
        [1, 1, 1]],

       [[0, 0, 0],
        [0, 0, 0],
        [2, 0, 0]]])

-3

In [85]:
containers = getListOfContainers(3, 3)
display(df)
df, score, lists = fillSingleContainerAtPosition(df, 0, 2, containers)
display(lists)
display(df)
display(score)

array([[[3, 1, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[2, 0, 0],
        [2, 2, 0],
        [1, 1, 1]],

       [[0, 0, 0],
        [0, 0, 0],
        [2, 0, 0]]])

array([2, 1, 3, 2, 3, 1, 3, 2])

array([[[3, 1, 0],
        [0, 0, 0],
        [1, 0, 0]],

       [[2, 0, 0],
        [2, 2, 0],
        [1, 1, 1]],

       [[0, 0, 0],
        [0, 0, 0],
        [2, 0, 0]]])

-2

In [86]:
containers = getListOfContainers(3, 3)
df, score, lists = fillSingleContainerAtPosition(df, 0, 1, containers)
display(lists)
display(df)
display(np.all(df.flatten() > 0))
display(score)

array([2, 3, 1, 1, 1, 3, 2, 3])

array([[[3, 1, 0],
        [2, 0, 0],
        [1, 0, 0]],

       [[2, 0, 0],
        [2, 2, 0],
        [1, 1, 1]],

       [[0, 0, 0],
        [0, 0, 0],
        [2, 0, 0]]])

False

-2

In [87]:
action = MultiDiscrete([3, 3], dtype=int).sample()

In [88]:
display(action)

array([1, 2])

In [89]:
display(action[0])

1

In [90]:
Discrete(5, start=1).n

5

In [91]:
Discrete(5, start=1).sample()

3

In [92]:
Dict(spaces={
    '1': Discrete(5),
    '2': Discrete(5),
    '3': Discrete(5),
    '4': Discrete(5),
}).sample()

OrderedDict([('1', 2), ('2', 4), ('3', 2), ('4', 0)])

In [93]:
# observation space
observation_space_test = Dict({
    'grid':
        Box(low=0,
                   high=5,
                   shape=(3, 3, 3), dtype=int),
    'container':
        Discrete(5),
    'containerCount':
        MultiDiscrete(np.full(shape=(5,), fill_value=5,dtype=int))})

In [94]:
print(observation_space_test.sample())

OrderedDict([('container', 3), ('containerCount', array([2, 2, 0, 3, 2], dtype=int64)), ('grid', array([[[3, 2, 0],
        [2, 3, 1],
        [3, 1, 1]],

       [[1, 2, 2],
        [0, 1, 3],
        [2, 4, 5]],

       [[2, 0, 4],
        [4, 1, 1],
        [5, 3, 2]]]))])


# Create environment

In [95]:
class ContainerEnv(Env):
    "Custom Environment that follows gym interface"
    metadata = {'render.modes': ['human']}

    def __init__(self, numberOfShips, containersPerShip, boxWidth, boxLength, boxHeight, percentage=0.0):
        super(ContainerEnv, self).__init__()
        self.numberOfShips = numberOfShips
        self.containersPerShip = containersPerShip
        self.percentage = percentage
        self.maxRows = boxLength
        self.maxColumns = boxWidth
        self.maxHeight = boxHeight
        self.maxStepsCount = ((self.maxRows * self.maxColumns * self.maxHeight) * self.maxHeight)
        self.action_space = MultiDiscrete([self.maxRows, self.maxColumns], dtype=int)
        self.observation_space = Dict(
            spaces={
                "grid": Box(low=0, high=numberOfShips, shape=(self.maxRows, self.maxColumns, self.maxHeight), dtype=int),
                "container": Discrete(numberOfShips+2, start=1),
                "containerCount": MultiDiscrete(np.full(shape=(numberOfShips,), fill_value=(self.maxRows * self.maxColumns * self.maxHeight),dtype=int))
            }
        )
        self.listOfContainers = getListOfContainers(self.numberOfShips, self.containersPerShip, self.maxHeight-1)
        self.containerCount = getContainersCountAsList(self.listOfContainers)
        self.containerGrid = fillInContainerGridRandom(np.zeros(shape=(self.maxRows, self.maxColumns, self.maxHeight), dtype=int), 
                                                       self.action_space, 
                                                       self.numberOfShips,
                                                       self.percentage)
    def step(self, action):
        self.maxStepsCount -= 1
        grid, reward, containerList = fillSingleContainerAtPosition(self.containerGrid, action[0], action[1], self.listOfContainers)
        self.containerGrid = grid
        self.listOfContainers = containerList
        self.containerCount = getContainersCountAsList(self.listOfContainers, self.containerCount)
        info = {"TimeLimit.truncated": False}
        # Check if there are no more containers to place
        if ((len(self.listOfContainers) == 0) | np.all(self.containerGrid.flatten() > 0)):
            obs = {'containerCount': self.containerCount, 'container': 0, 'grid': self.containerGrid}
            return obs, reward, True, info
        else:
            obs = {'containerCount': self.containerCount, 'container': int(self.listOfContainers[-1]),
                   'grid': self.containerGrid}
            if (self.maxStepsCount == 0):
                info["TimeLimit.truncated"] = True
                return obs, reward, True, info
            return obs, reward, False, info
    
    def reset(self):
        self.maxStepsCount = ((self.maxRows * self.maxColumns * self.maxHeight) * self.maxHeight)
        self.containerGrid = fillInContainerGridRandom(np.zeros(shape=(self.maxRows, self.maxColumns, self.maxHeight), dtype=int), 
                                                       self.action_space, 
                                                       self.numberOfShips,
                                                       self.percentage)
        self.listOfContainers = getListOfContainers(self.numberOfShips, 
                                                    #random.randint(max(1, self.containersPerShip - 5), self.containersPerShip + 5),
                                                    self.containersPerShip,
                                                    self.maxHeight-1)
        self.containerCount = getContainersCountAsList(self.listOfContainers)
        obs = {'containerCount': self.containerCount, 'container': int(self.listOfContainers[-1]), 'grid': self.containerGrid}
        return obs
    
    def render(self, mode='human', close=False):
        display(self.containerGrid)
    def close (self):
        display(self.containerGrid)

In [96]:
from stable_baselines3.common.env_checker import check_env

env = ContainerEnv(3, 3, 1, 3, 3)
# It will check your custom environment and output additional warnings if needed
#check_env(env)

In [97]:
numberOfShips = 3
containersPerShip = 4
boxHeight = 3
boxWidth = 3
boxLength = 3
env = ContainerEnv(numberOfShips, containersPerShip, boxWidth, boxLength, boxHeight)
display(env.action_space)
display(env.observation_space)
display(env.reset())

MultiDiscrete([3 3])

Dict('container': Discrete(5, start=1), 'containerCount': MultiDiscrete([27 27 27]), 'grid': Box(0, 3, (3, 3, 3), int32))

{'containerCount': array([8, 8, 8]),
 'container': 2,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

# Test environment

In [98]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    display(state)
    done = False
    score = 0
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        state = n_state
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
    print('final env state')
    display(state)
    display(env.listOfContainers)
env.close()

{'containerCount': array([8, 8, 8]),
 'container': 2,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:1 Score:4
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[1, 2, 2],
         [3, 3, 0],
         [2, 3, 2]],
 
        [[3, 2, 1],
         [2, 2, 3],
         [3, 3, 3]],
 
        [[2, 1, 0],
         [1, 1, 0],
         [1, 1, 1]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 2,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:2 Score:-56
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[2, 3, 3],
         [3, 1, 2],
         [3, 1, 1]],
 
        [[1, 1, 0],
         [2, 0, 0],
         [1, 2, 1]],
 
        [[2, 3, 3],
         [2, 2, 1],
         [2, 3, 3]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 1,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:3 Score:-13
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[1, 2, 3],
         [3, 0, 0],
         [1, 3, 3]],
 
        [[2, 2, 1],
         [2, 3, 3],
         [1, 1, 1]],
 
        [[1, 2, 1],
         [3, 3, 2],
         [2, 2, 0]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 3,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:4 Score:-37
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[1, 2, 3],
         [1, 3, 0],
         [3, 3, 1]],
 
        [[3, 3, 2],
         [1, 2, 2],
         [3, 2, 2]],
 
        [[2, 1, 0],
         [1, 1, 0],
         [2, 1, 3]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 3,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:5 Score:-23
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[3, 3, 1],
         [3, 0, 0],
         [2, 3, 0]],
 
        [[3, 2, 1],
         [1, 3, 2],
         [2, 1, 2]],
 
        [[1, 1, 2],
         [1, 1, 3],
         [2, 2, 3]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 1,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:6 Score:-75
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[2, 1, 3],
         [1, 2, 1],
         [2, 2, 3]],
 
        [[1, 3, 2],
         [1, 2, 1],
         [2, 3, 3]],
 
        [[3, 0, 0],
         [2, 3, 0],
         [1, 3, 1]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 1,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:7 Score:-63
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[3, 2, 3],
         [3, 2, 2],
         [3, 2, 2]],
 
        [[2, 3, 1],
         [1, 1, 1],
         [2, 1, 1]],
 
        [[2, 3, 3],
         [3, 1, 1],
         [0, 0, 0]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 3,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:8 Score:3
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[1, 2, 0],
         [1, 2, 3],
         [2, 2, 2]],
 
        [[1, 3, 0],
         [1, 1, 0],
         [2, 3, 2]],
 
        [[3, 3, 1],
         [1, 1, 3],
         [3, 2, 3]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 2,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:9 Score:-45
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[3, 2, 2],
         [1, 3, 3],
         [1, 2, 3]],
 
        [[2, 3, 2],
         [1, 1, 2],
         [1, 3, 1]],
 
        [[1, 0, 0],
         [3, 2, 0],
         [1, 3, 2]]])}

array([], dtype=int32)

{'containerCount': array([8, 8, 8]),
 'container': 3,
 'grid': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]])}

Episode:10 Score:-2
final env state


{'containerCount': array([0, 0, 0]),
 'container': 0,
 'grid': array([[[3, 1, 1],
         [3, 2, 3],
         [3, 3, 3]],
 
        [[3, 1, 2],
         [2, 0, 0],
         [1, 1, 2]],
 
        [[2, 2, 0],
         [3, 1, 2],
         [1, 2, 1]]])}

array([], dtype=int32)

array([[[3, 1, 1],
        [3, 2, 3],
        [3, 3, 3]],

       [[3, 1, 2],
        [2, 0, 0],
        [1, 1, 2]],

       [[2, 2, 0],
        [3, 1, 2],
        [1, 2, 1]]])

# Stable baselines

In [99]:
for percentage in np.arange(0.0, 1.0, 0.1):
    print(percentage)

0.0
0.1
0.2
0.30000000000000004
0.4
0.5
0.6000000000000001
0.7000000000000001
0.8
0.9


In [100]:
from typing import Callable
def make_env(numberOfShips, containersPerShip, boxHeight, boxWidth, boxLength, percentage) -> Callable:
    def _init() -> Env:
        env = ContainerEnv(numberOfShips, containersPerShip, boxHeight, boxWidth, boxLength, percentage)
        return env
    return _init

In [101]:
def getTotalTimeSteps(timesteps, percentage):
    return timesteps
    #return timesteps * (1 - percentage)

In [102]:
num_cpu = 4
startPercentage = 0.9
timesteps = 40_000

modelEnvironment = DummyVecEnv([make_env(numberOfShips, containersPerShip, boxWidth, boxLength, boxHeight, 0) for i in range(num_cpu)])
model = PPO("MultiInputPolicy", modelEnvironment, verbose=1, learning_rate=0.0005, device='cpu', tensorboard_log="/tmp/ContainerEnvV11_2")
model.learn(total_timesteps=getTotalTimeSteps(timesteps, 0), tb_log_name=("Env_with_percentage_" + str(0)))
model.train()

model.save("ContainerEnvV11_2")
for modelIterations in range(0, 5):
    print("Model iteratie: " + str(modelIterations))
    for percentage in reversed(np.arange(0.0, startPercentage, 0.1)):
        print("Using percentage: " + str(percentage))
        newEnv = DummyVecEnv([make_env(numberOfShips, containersPerShip, boxWidth, boxLength, boxHeight, percentage) for i in range(num_cpu)])
        model.set_env(newEnv)
        model.learn(total_timesteps=getTotalTimeSteps(timesteps, percentage), tb_log_name=("Env_with_percentage_" + str(percentage) + str(modelIterations)), reset_num_timesteps=False)
        model.train()
        model.save("ContainerEnvV11_2")

Using cpu device
Logging to /tmp/ContainerEnvV11_2\Env_with_percentage_0_1
-----------------------------
| time/              |      |
|    fps             | 3171 |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 8192 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1478        |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.012076547 |
|    clip_fraction        | 0.153       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.19       |
|    explained_variance   | 0.00281     |
|    learning_rate        | 0.0005      |
|    loss                 | 32.1        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0184     |
|    value_loss           | 98.2        |
-

KeyboardInterrupt: 

In [None]:
eval_env = ContainerEnv(numberOfShips, containersPerShip, boxWidth, boxLength, boxHeight)
eval_env.reset()
display(eval_env.containerGrid)
display(eval_env.listOfContainers)

In [None]:
from stable_baselines3.common.monitor import Monitor
monitor = Monitor(eval_env)
evaluate_policy(model, monitor, n_eval_episodes=5, return_episode_rewards= True, render=False)

# Using trained agent

In [None]:
display(eval_env.reset())
display(eval_env.listOfContainers)

In [None]:
obs = eval_env.reset()
print("---------------------\n")
for i in range(10):
    display(eval_env.listOfContainers)
    print(len(eval_env.listOfContainers))
    reward = 0
    dones = False
    display(obs)
    while not dones:
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, dones, info = eval_env.step(action)
        reward += rewards
    eval_env.render()
    print((obs['grid'].flatten() > 0).sum())
    print("Reward: " + str(reward))
    display(eval_env.listOfContainers)
    obs = eval_env.reset()
    print("---------------------\n")

In [None]:
amountOfShips = 3
containersOnShip = 4
height = 3
width = 3
length = 3

In [None]:
for percentage in np.arange(0.0, 0.9, 0.1):
    validateEnv = ContainerEnv(amountOfShips, containersOnShip, width, length, height, percentage)
    state = validateEnv.reset()
    print('Start list of containers ------------------------')
    display(validateEnv.listOfContainers)
    print(len(validateEnv.listOfContainers))
    print('Start render ------------------------------------')
    validateEnv.render()
    reward = 0
    dones = False
    while not dones:
        action, _states = model.predict(state, deterministic=True)
        state, rewards, dones, info = validateEnv.step(action)
        reward += rewards
    print('End render --------------------------------------')
    print((state['grid'].flatten() > 0).sum())
    print("Reward: " + str(reward))
    validateEnv.render()
    print('End list of containers --------------------------')
    display(validateEnv.listOfContainers)