In [None]:
%matplotlib inline
%run env.py
%run rl-helpers.py

Q-learning agent
---

In [None]:
# Create environment
obs_wrapper = GridView # Possible values: CompassQTable, LidarCompassQTable, GridView, BinaryGridView
env = obs_wrapper(DeliveryDrones(n=5))
states = env.reset()

# Create agents
agents = {drone.index: RandomAgent(env) for drone in env.drones}
my_agent = QLearningAgent(
    env, gamma=0.9, alpha=0.1, epsilon_start=1, epsilon_decay=0.98, epsilon_end=0.01)
my_drone = env.drones[0]
agents[my_drone.index] = my_agent

# Train for a few steps, plot results
trainer = MultiAgentTrainer(env, agents, seed=0)
my_agent.is_greedy = False
trainer.train(5000)
plot_rolling_rewards(trainer.rewards_log, subset=range(1, 5))
my_agent.get_qtable()

# Test agents
my_agent.is_greedy = True
rewards_log = test_agents(env, agents, n_steps=1000, seed=0)
plot_cumulative_rewards(rewards_log, subset=range(1, 5))

Test agents
---

In [None]:
from IPython.display import clear_output
import time

# Make sure our drone behaves greedily
my_drone.is_greedy = True

# Simulation loop
states = env.reset()
rewards = None
while True:
    # Render
    clear_output(wait=True)
    print(env.render('ainsi'))

    # Act
    actions = {index: agent.act(states[index]) for index, agent in agents.items()}

    # Print last rewards and next actions
    print('Drone:', my_drone.index)
    if hasattr(env, 'format_state'):
        print('Current states:', env.format_state(states[my_drone.index]))
    if hasattr(env, 'format_action'):
        print('Next actions:', env.format_action(actions[my_drone.index]))
    if rewards is not None:
        print('Last rewards:', rewards[my_drone.index])

    # Sleep, step, learn
    time.sleep(0.5)
    states, rewards, dones, _ = env.step(actions)

Benchmarking
---

In [None]:
# Create drones & environment
env = GridView(DeliveryDrones(n=25))
states = env.reset()

# Run drones
for _ in tqdm_notebook(range(10**6)):
    env.step({drone.index: env.action_space.sample() for drone in env.drones})

Development space below
---

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import gym.spaces as spaces
import numpy as np

In [None]:
%run env.py
%run rl-helpers.py

# Create environment
env = GridView(DeliveryDrones(n=5))
states = env.reset()
all_drones, all_drones_positions = env.air.get_objects(Drone)
print('Drones:', all_drones, all_drones_positions)

all_packets, all_packets_positions = env.ground.get_objects(Packet)
print('Packets:', all_packets, all_packets_positions)

all_dropzones, all_dropzones_positions = env.ground.get_objects(Dropzone)
print('Dropzones:', all_dropzones, all_dropzones_positions)

print('Drones packets:', [(d, d.packet) for d in all_drones])
print(env.render(mode='ainsi'))

Graphics
---

In [None]:
# TODO

# Drone, pickup, dropoff, collision ~ simple geometric shapes
# https://image.freepik.com/free-vector/simple-geometric-shapes-background_1168-371.jpg
# Animation: slightly growing/shrinking to simulate up/down movement with shade

# Desired output
# https://img.deszone.net/2018/05/simple-geometric-shapes-free-vector-pattern4.jpg
# https://as1.ftcdn.net/jpg/01/72/82/18/500_F_172821814_Oyl3cNYBcigDpeCzehbAQghLxJILrZA5.jpg

# Other ideas
# Drones leave a fading trace