In [None]:
from PathPlanningEnv import PathPlanningEnv
from FCNN import FCNN
from run import TrainMemorize, TrainQlearning, PlayOnce
from utility import MovingAveragePlot, VectorFieldPlot
import settings
from matplotlib import pyplot as plt
from Q_Network import Q_Network
import time

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 5,
    'random_seed' : 42
}

env = PathPlanningEnv(**model_settings)
env.display()
print(env.distances)

In [None]:
input_dim = 3 * model_settings['height'] * model_settings['width'] + 4
network_fcnn = FCNN(input_dim)

In [None]:
config = settings.Config()
start_time = time.time()
rewards, losses = TrainMemorize(network_fcnn, env, config)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
print(env.foot_prints)

In [None]:
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 4,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 0,
    init_col = 5,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 0,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
VectorFieldPlot(network_fcnn, env, model_settings['width'], model_settings['height'])

## Q-learning

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 5,
    'random_seed' : 42
}

env = PathPlanningEnv(**model_settings)
env.display()
print(env.distances)

In [None]:
input_dim = 3 * model_settings['height'] * model_settings['width'] + 4
network_fcnn = FCNN(input_dim)

In [None]:
config = settings.Config(epsilon=0.9, epsilon_low = 0.1, epsilon_step = 0.05, max_play_length=500, epochs = 2000, gamma = 0.99)
start_time = time.time()
rewards, losses = TrainQlearning(network_fcnn, env, config)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
print(env.foot_prints)

In [None]:
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 4,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 0,
    init_col = 5,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 0,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
VectorFieldPlot(network_fcnn, env, model_settings['width'], model_settings['height'])

## Q-learning with CNN

In [None]:
network_cnn = Q_Network(
    BatchSize = 1,
    MapHeight = model_settings['height'],
    MapWidth = model_settings['width'],
    Covn1OutChan = 8,
    Conv1Kernel = 3,
    Covn2OutChan = 8,
    Conv2Kernel = 3,
    HiddenSize = 64
)

In [None]:
config = settings.Config(epsilon=0.9, epsilon_low = 0.1, epsilon_step = 0.05, max_play_length=500, epochs = 2000, gamma = 0.99)
start_time = time.time()
rewards, losses = TrainQlearning(network_cnn, env, config)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
VectorFieldPlot(network_cnn, env, model_settings['width'], model_settings['height'])

Check rewards

In [None]:
import numpy as np
for i in range(10):
    for j in range(10):
        old_obs = env.grid[2,:,:]
        goal_row = env.goal_row
        goal_col = env.goal_col
        env._init_from_grid(old_obs, i, j, goal_row, goal_col)

        if env.grid[2, i, j] == 0:
            preds = []
            state = env.grid.clone().detach()
            state = state.view(1, *state.shape)
            for action in env.actions:
                action = action.view(1, *action.shape)
                pred = network_fcnn(state, action)
                preds.append(pred)

            print("{:.2f}".format(max(preds).item()), end = " ")
        else:
            print("xxxx", end = " ")

    print(" ")