In [None]:
%matplotlib inline
%run env.py
%run rl-helpers.py

Q-learning and DQN agents
---

In [None]:
# Create environment:
#   (Q-table) CompassQTable, CompassChargeQTable, LidarCompassQTable, LidarCompassChargeQTable
#   (Grid)    GlobalGridView, PlayerGridView
obs_wrapper = PlayerGridView
env = obs_wrapper(DeliveryDrones(n=1))
states = env.reset()

# Agent
"""Q-learning agent
my_agent = QLearningAgent(env, gamma=0.99, alpha=0.1, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01)
"""

"""DQN with dense Q-network"""
my_agent = DQNAgent(
    env, DenseQNetworkFactory(env, hidden_layers=[32]),
    gamma=0.99, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01, memory_size=10000, batch_size=64, target_update_interval=5)

"""DQN with conv. Q-network
my_agent = DQNAgent(
    env, ConvQNetworkFactory(env, conv_layers=[
        #{'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        #{'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'padding': 1},
        #{'out_channels': 16, 'kernel_size': 3, 'stride': 1, 'padding': 1}
    ], dense_layers=[]),
    gamma=0.995, epsilon_start=1, epsilon_decay=0.999, epsilon_end=0.01, memory_size=10000, batch_size=64, target_update_interval=5)
"""

# Setup opponents
agents = {drone.index: RandomAgent(env) for drone in env.drones}
my_drone = env.drones[0]
agents[my_drone.index] = my_agent

# Train for a few steps, plot results
trainer = MultiAgentTrainer(env, agents, seed=0)
my_agent.is_greedy = False
trainer.train(200)
plot_rolling_rewards(trainer.rewards_log, subset=range(1, 5))

Inspect agents
---

In [None]:
from IPython.display import display

# Q-learning agent
if isinstance(my_agent, QLearningAgent):
    # Q-table
    print('Q-table:', my_agent.get_qtable().shape)
    display(my_agent.get_qtable().sample(10))

# For DQN-agent
elif isinstance(my_agent, DQNAgent):
    # Memory replay
    my_agent.inspect_memory()
    
    # Q-network
    print('Q-network:')
    print(my_agent.qnetwork)
    print()
    
# Epsilon decay
plt.plot(my_agent.epsilons)
plt.title('Epsilon decay')
plt.xlabel('Episodes')
plt.ylabel('Epsilon')
plt.show()

# Test with different seeds
my_agent.is_greedy = True
for i in range(10):
    rewards_log = test_agents(env, agents, n_steps=1000, seed=i)
    plot_cumulative_rewards(rewards_log, subset=range(1, 5))

Test agents
---

In [None]:
from IPython.display import clear_output
import time

# Make sure our drone behaves greedily
my_drone.is_greedy = True

# Simulation loop
states = env.reset()
my_drone = env.drones[0]
rewards = None
while True:
    # Render
    clear_output(wait=True)
    print(env.render('ainsi'))

    # Act
    actions = {index: agent.act(states[index]) for index, agent in agents.items()}

    # Print last rewards and next actions
    print('Drone:', my_drone.index, 'charge: {}%'.format(my_drone.charge))
    if hasattr(env, 'format_state'):
        print('Current states:', env.format_state(states[my_drone.index]))
    if hasattr(env, 'format_action'):
        print('Next actions:', env.format_action(actions[my_drone.index]))
    if rewards is not None:
        print('Last rewards:', rewards[my_drone.index])

    # Sleep, step, learn
    time.sleep(1)
    states, rewards, dones, _ = env.step(actions)

Benchmarking
---

In [None]:
# Create drones & environment
env = PlayerGridView(DeliveryDrones(n=3))
states = env.reset()

# Run drones
for i in tqdm(range(10**6)):
    states, rewards, dones, _  = env.step({drone.index: env.action_space.sample() for drone in env.drones})

Development space below
---

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import gym.spaces as spaces
import numpy as np

In [None]:
%run env.py
%run rl-helpers.py

# Create environment
env = PlayerGridView(DeliveryDrones(n=3))
states = env.reset()

In [None]:
all_drones, all_drones_positions = env.air.get_objects(Drone)
print('Drones:', all_drones, all_drones_positions)

all_packets, all_packets_positions = env.ground.get_objects(Packet)
print('Packets:', all_packets, all_packets_positions)

all_dropzones, all_dropzones_positions = env.ground.get_objects(Dropzone)
print('Dropzones:', all_dropzones, all_dropzones_positions)

all_stations, all_stations_positions = env.ground.get_objects(Station)
print('Stations:', all_stations_positions)

print('Drones packets:', [(d, d.packet) for d in all_drones])
print('Drones charge:', [(d, d.charge) for d in all_drones])
print(env.render(mode='ainsi'))

In [None]:
%run env.py
%run rl-helpers.py
    
# Create environment
obs_wrapper = BinaryGridView
env = obs_wrapper(DeliveryDrones(n=1))
states = env.reset()

# Create agents
agents = {drone.index: RandomAgent(env) for drone in env.drones}
#my_agent = NeuralNetworkAgent(env, gamma=0.98, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01)
#my_agent = ReplayMemoryAgent(env, gamma=0.98, epsilon_start=1, epsilon_decay=0.99, epsilon_end=0.01, memory_size=10000, batch_size=64)
#my_agent = DQNAgent(env, gamma=0.995, epsilon_start=1, epsilon_decay=0.995, epsilon_end=0.01, memory_size=10000, batch_size=64, target_update_interval=5)
my_agent = MyDQNAgent(env, conv_sizes=[16, 16, 32], fc_sizes=[128], gamma=0.98, epsilon_start=1, epsilon_decay=0.995, epsilon_end=0.01, memory_size=10000, batch_size=32, target_update_interval=5)
my_drone = env.drones[0]
agents[my_drone.index] = my_agent

# Train for a few steps, plot results
trainer = MultiAgentTrainer(env, agents, seed=0)
my_agent.is_greedy = False
trainer.train(50000)
plot_rolling_rewards(trainer.rewards_log, subset=range(1, 5))
plt.plot(my_agent.epsilons)

# Test agents
my_agent.is_greedy = True
rewards_log = test_agents(env, agents, n_steps=1000, seed=0)
plot_cumulative_rewards(rewards_log, subset=range(1, 5))

Graphics
---

In [None]:
# TODO

# Drone, pickup, dropoff, collision ~ simple geometric shapes
# https://image.freepik.com/free-vector/simple-geometric-shapes-background_1168-371.jpg
# Animation: slightly growing/shrinking to simulate up/down movement with shade

# Desired output
# https://img.deszone.net/2018/05/simple-geometric-shapes-free-vector-pattern4.jpg
# https://as1.ftcdn.net/jpg/01/72/82/18/500_F_172821814_Oyl3cNYBcigDpeCzehbAQghLxJILrZA5.jpg

# Other ideas
# Drones leave a fading trace