In [None]:
from PathPlanningEnv import PathPlanningEnv
from FCNN import FCNN
from run import TrainMemorize, TrainQlearning, PlayOnce
from utility import MovingAveragePlot
import settings
from matplotlib import pyplot as plt

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 5,
    'random_seed' : 42
}

env = PathPlanningEnv(**model_settings)
env.display()
print(env.distances)

In [None]:
input_dim = 3 * model_settings['height'] * model_settings['width'] + 4
network_fcnn = FCNN(input_dim)

In [None]:
config = settings.Config()
rewards, losses = TrainMemorize(network_fcnn, env, config)

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
print(env.foot_prints)

In [None]:
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 4,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 0,
    init_col = 5,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 0,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

## Q-learning

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 5,
    'random_seed' : 42
}

env = PathPlanningEnv(**model_settings)
env.display()
print(env.distances)

In [None]:
input_dim = 3 * model_settings['height'] * model_settings['width'] + 4
network_fcnn = FCNN(input_dim)

In [None]:
config = settings.Config(epsilon=0.9, epsilon_low = 0.1, epsilon_step = 0.05, max_play_length=500, epochs = 2000, gamma = 0.99)
rewards, losses = TrainQlearning(network_fcnn, env, config)

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
print(env.foot_prints)

In [None]:
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 4,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 0,
    init_col = 5,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 0,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

Check rewards

In [None]:
import numpy as np
for i in range(10):
    for j in range(10):
        old_obs = env.grid[2,:,:]
        goal_row = env.goal_row
        goal_col = env.goal_col
        env._init_from_grid(old_obs, i, j, goal_row, goal_col)

        if env.grid[2, i, j] == 0:
            preds = []
            state = env.grid
            for action in env.actions:
                pred = network_fcnn(state, action)
                preds.append(pred)

            print("{:.2f}".format(max(preds).item()), end = " ")
        else:
            print("xxxx", end = " ")

    print(" ")

In [None]:
def PlotVectorField(env: PathPlanningEnv, width, height):

    fig = plt.subplots(figsize=(width, height))
    ax = plt.axes(xlim=(-1, width - 1 + 1), ylim=(-1, height - 1 + 1))
    tgtPoint, = ax.plot(env.goal_col, height - 1 - env.goal_row, linestyle='', markersize=20, marker='o', color=(0.5, 0.5, 1.0, 0.75))

    # obstacles
    for i in range(height):
        for j in range(width):
            if env.grid[2, i, j] == 1:
                obs_row = height - 1 - i
                obs_col = j
                obsstaclePoint, = ax.plot(obs_col, obs_row, linestyle='', markersize=15, marker='x', markeredgewidth=2, color='r')

    # vector field
    for i in range(height):
        for j in range(width):
            old_obs = env.grid[2,:,:]
            goal_row = env.goal_row
            goal_col = env.goal_col
            env._init_from_grid(old_obs, i, j, goal_row, goal_col)
            state = env.grid
            if env.grid[2, i, j] == 0:
                state = env.grid
                preds = []
                state = env.grid
                for action in env.actions:
                    pred = network_fcnn(state, action)
                    pred_list = pred.detach().numpy().tolist()
                    preds.append(pred_list)
                max_index = preds.index(max(preds))
                max_reward = max(preds)

                x = j  # x coordinates
                y = height - 1 - i  # y coordinates
                if max_index == 0: # go up
                    u = [0]  # x component of SVF
                    v = max_reward  # y component of SVF
                elif max_index == 1: # go down
                    u = [0]  
                    v= [i * (-1) for i in max_reward]
                elif max_index == 2: # go left
                    u = [i * (-1) for i in max_reward]
                    v = [0]  
                else: # go right
                    u = max_reward  # 
                    v = [0]  
                Q1 = ax.quiver(x, y, u, v, color=(0, 0.2, 0), scale_units='xy', scale=2, headwidth=0.5);

    plt.xticks([]);
    plt.yticks([]);

In [None]:
PlotVectorField(env, model_settings['width'], model_settings['height'])