In [None]:
from PathPlanningEnv import PathPlanningEnv
from FCNN import FCNN
from run import TrainMemorize, TrainQlearning, PlayOnce, TrainQlearningMultiple, TrainQlearningMultipleReverse
from utility import MovingAveragePlot, VectorFieldPlot
import settings
from matplotlib import pyplot as plt
from Q_Network import Q_Network
import time
import random

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 10,
    'random_seed' : 42
}

env = PathPlanningEnv(**model_settings)
env.display()
print(env.distances)

In [None]:
input_dim = 3 * model_settings['height'] * model_settings['width'] + 4
network_fcnn = FCNN(input_dim)

In [None]:
config = settings.Config()
start_time = time.time()
rewards, losses = TrainMemorize(network_fcnn, env, config)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
print(env.foot_prints)

In [None]:
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 4,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 0,
    init_col = 5,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 0,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
VectorFieldPlot(network_fcnn, env, model_settings['width'], model_settings['height'])

## Q-learning

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 10,
    'random_seed' : 42
}

env = PathPlanningEnv(**model_settings)
env.display()
print(env.distances)

In [None]:
input_dim = 3 * model_settings['height'] * model_settings['width'] + 4
network_fcnn = FCNN(input_dim)

In [None]:
config = settings.Config(epsilon=0.9, epsilon_low=0.1, epsilon_step=0.05, max_play_length=500, epochs=2000, gamma=0.99, random_reset=False)
start_time = time.time()
rewards, losses, traces = TrainQlearning(network_fcnn, env, config, trace_step = 10)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
print(env.foot_prints)

In [None]:
PlayOnce(network_fcnn, env, config)

In [None]:
with open("traces.txt", "w+") as f:
    count = 1
    for trace in traces:
        f.write(str(count*10))
        f.write("  ")
        f.write(str(len(trace)))
        f.write("  ")
        f.write(str(trace))
        f.write("\n")
        count += 1

In [None]:
import matplotlib.pyplot as plt

trace = traces[190]

row = [10 - x[0] for x in trace]
col = [x[1] for x in trace]

plt.xlim([-1, 10])
plt.ylim([0, 11])
plt.plot(col,row)
plt.plot(col,row,'or')
plt.show()

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 4,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 9,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 0,
    init_col = 5,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
env = PathPlanningEnv(
    grid = env.grid[2,:,:],
    init_row = 1,
    init_col = 0,
    goal_row = env.goal_row,
    goal_col = env.goal_col
)
PlayOnce(network_fcnn, env, config)

In [None]:
VectorFieldPlot(network_fcnn, env, model_settings['width'], model_settings['height'])

## Q-learning with CNN

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 10,
    'random_seed' : 42
}

env = PathPlanningEnv(**model_settings)
env.display()
print(env.distances)

In [None]:
network_cnn = Q_Network(
    BatchSize = 1,
    MapHeight = model_settings['height'],
    MapWidth = model_settings['width'],
    Covn1OutChan = 8,
    Conv1Kernel = 3,
    Covn2OutChan = 8,
    Conv2Kernel = 3,
    HiddenSize = 64
)

In [None]:
config = settings.Config(epsilon=0.9, epsilon_low = 0.1, epsilon_step = 0.05, max_play_length=500, epochs = 2000, gamma = 0.9)
start_time = time.time()
rewards, losses = TrainQlearning(network_cnn, env, config)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
MovingAveragePlot(rewards, 100)

In [None]:
VectorFieldPlot(network_cnn, env, model_settings['width'], model_settings['height'])

## Q-learning with CNN on Multiple Maps

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 10
}

envs = []
for seed in range(42, 52):
    model_settings['random_seed'] = seed
    envs.append(PathPlanningEnv(**model_settings))

In [None]:
network_cnn = Q_Network(
    BatchSize = 1,
    MapHeight = model_settings['height'],
    MapWidth = model_settings['width'],
    Covn1OutChan = 32,
    Conv1Kernel = 3,
    Covn2OutChan = 64,
    Conv2Kernel = 3,
    HiddenSize = 256
)

In [None]:
config = settings.Config(epsilon=0.9, epsilon_low = 0.1, epsilon_step = 0.005, max_play_length=500, epochs = 20000, gamma = 0.9)
start_time = time.time()
rewards, losses = TrainQlearningMultiple(network_cnn, envs, config)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
VectorFieldPlot(network_cnn, envs[8], model_settings['width'], model_settings['height'])

In [None]:
model_settings['random_seed'] = 52
test_env = PathPlanningEnv(**model_settings)

In [None]:
VectorFieldPlot(network_cnn, test_env, model_settings['width'], model_settings['height'])

## Q-learning with CNN with reversed training (allow multiple maps)

In [None]:
model_settings = {
    'height' : 10,
    'width' : 10,
    'obs_count' : 10
}

envs = []
for seed in range(42, 43):
    model_settings['random_seed'] = seed
    envs.append(PathPlanningEnv(**model_settings))

In [None]:
network_cnn = Q_Network(
    BatchSize = 1,
    MapHeight = model_settings['height'],
    MapWidth = model_settings['width'],
    Covn1OutChan = 32,
    Conv1Kernel = 3,
    Covn2OutChan = 64,
    Conv2Kernel = 3,
    HiddenSize = 256
)

In [None]:
config = settings.Config(epsilon=0.9, epsilon_low = 0.1, epsilon_step = 0.05, max_play_length=500, epochs = 400, gamma = 0.9)
start_time = time.time()
rewards, losses = TrainQlearningMultipleReverse(network_cnn, envs, config)
end_time = time.time()
print('Training time: %d seconds' % (end_time - start_time))

In [None]:
VectorFieldPlot(network_cnn, , model_settings['width'], model_settings['height'])

## Q-learning with progressive training

In [None]:
from ProgressiveTrainer import ProgressiveTrainer
from AVNet import AVNet

In [None]:
network_cnn = AVNet()

In [None]:
trainer = ProgressiveTrainer(network_cnn, device='cuda', max_env_num=80)

In [None]:
cnt = 0
while True:
    cnt += 1
    train_state = trainer.train_once()
    if train_state['finish']: break
    if train_state['inc_env']:
        print('epoch %d environment increased' % cnt)
    if cnt % 100 == 0:
        print('epoch %d' % cnt, end=' ')
        print('test envs:', end= ' ')
        for fail_rate in trainer.test_envs():
            print('%.3f' % fail_rate, end=' ')
        print()

## Check rewards

In [None]:
import numpy as np
for i in range(10):
    for j in range(10):
        old_obs = env.grid[2,:,:]
        goal_row = env.goal_row
        goal_col = env.goal_col
        env._init_from_grid(old_obs, i, j, goal_row, goal_col)

        if env.grid[2, i, j] == 0:
            preds = []
            state = env.grid.clone().detach()
            state = state.view(1, *state.shape)
            for action in env.actions:
                action = action.view(1, *action.shape)
                pred = network_fcnn(state, action)
                preds.append(pred)

            print("{:.2f}".format(max(preds).item()), end = " ")
        else:
            print("xxxx", end = " ")

    print(" ")