In [1]:
import random

import Device
!pip install vmas
!pip install Pillow
!pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
!pip install ipython
!pip install autoreload
!pip install torch-geometric
!pip install wandb

Looking in indexes: https://download.pytorch.org/whl/nightly/cpu
Collecting wandb
  Obtaining dependency information for wandb from https://files.pythonhosted.org/packages/ed/d7/8927aef63869d5d379adb63dc97f9cbc53830fdf85457b84a156fabcb231/wandb-0.15.8-py3-none-any.whl.metadata
  Downloading wandb-0.15.8-py3-none-any.whl.metadata (8.3 kB)
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Obtaining dependency information for GitPython!=3.1.29,>=1.0.0 from https://files.pythonhosted.org/packages/67/50/742c2fb60989b76ccf7302c7b1d9e26505d7054c24f08cc7ec187faaaea7/GitPython-3.1.32-py3-none-any.whl.metadata
  Downloading GitPython-3.1.32-py3-none-any.whl.metadata (10.0 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Obtaining dependency information for sentry-sdk>=1.0.0 from https://files.pythonhosted.org/packages/86/bb/ecb87fd214d5bbade07edf2ecdd829cf346e5b552689d6228112c6517286/sentry_sdk-1.29.2-py2.py3-none-any.whl.metadata
  Downloading sentry_sdk-1.29.2-py2.py3-none-any.whl.me

In [3]:
import importlib.util
if importlib.util.find_spec('pyflamegpu') is None:
    import sys
    !{sys.executable} -m pip install --extra-index-url https://whl.flamegpu.com/whl/cuda112/ pyflamegpu==2.0.0rc0 # type: ignore

# Import pyflamegpu and some other libraries we will use in the tutorial
import pyflamegpu
import sys, random, math
import matplotlib.pyplot as plt

In [1]:

import threading
import copy
import random
import time

import torch
from PIL import Image
from torch import tensor, Tensor
from vmas import make_env

import wandb
from Cleaning import Scenario as CleaningScenario
from DeepQLearner import DeepQLearner
from LearningConfiguration import LearningConfiguration, NNFactory
from ReplayBuffer import ReplayBufferFactory
import Device

scenario_name = CleaningScenario()

# Scenario specific variables
n_agents = 1
n_targets = 8
num_envs = 1  # Number of vectorized environments
continuous_actions = True
device = Device.get()  # or cuda or any other torch device
n_steps = 1000  # Number of steps before returning done
n_epochs = 1000
dict_spaces = True  # Weather to return obs, rewards, and infos as dictionaries with agent names (by default they are lists of len # of agents)

run = wandb.init(project="vmas", reinit=True, config={
    "learning_rate": 0.0005,
    "architecture": "MLP",
    #"epochs": n_steps
})

dataset_size = 10000

frame_list = []  # For creating a gif
init_time = time.time()
step = 0

# Actions
speed = 0.5
north = tensor([0, -1*speed])
south = tensor([0, speed])
east = tensor([speed, 0])
west = tensor([-1*speed, 0])
#stop = tensor([0, 0])
ne = tensor([speed, -1*speed])
nw = tensor([-1*speed, -1*speed])
se = tensor([speed, speed])
sw = tensor([-1*speed, speed])

lidar_measure_shape = 50# * 2
pos_shape = 2
vel_shape = 2
tot_shape = lidar_measure_shape# + pos_shape + vel_shape

actions = [north, south, east, west, ne, nw, se, sw]
#learning_configuration = LearningConfiguration(update_each=math.floor(n_steps/3),dqn_factory=NNFactory(tot_shape,64,len(actions)))
learning_configuration = LearningConfiguration(update_each=200,dqn_factory=NNFactory(tot_shape,64,len(actions)))

dql = DeepQLearner(
    memory=ReplayBufferFactory(dataset_size),
    action_space=actions,
    learning_configuration=learning_configuration
)

#dql.load_snapshot("./-38-2023-09-21-23-55-46-agent-0")

targets_pos = []

for i in range(n_targets):
    targets_pos.append(tensor([random.random() * random.randint(-1, 1), random.random() * random.randint(-1, 1)], device=Device.get()))    

def isOneEnvDone(info_array):
    tensor = info_array["agent_0"]["active_targets"]
    for i in range(num_envs):
        if tensor[i] == 0:
            return True
    return False

def save_gif(frame_list, epoch):
    for i in range(1):
        gif_name = scenario_name.__class__.__name__ + "-env-" + str(i) + "-epoch-" + str(epoch) + ".gif"
        frame_list[i].save(
                gif_name,
                save_all=True,
                append_images=frame_list[1:],
                duration=1,
                loop=0,
            )
    dql.snapshot(epoch, "0")

for e in range(0, n_epochs):
    env = make_env(
        scenario=scenario_name,
        num_envs=num_envs,
        device=device,
        continuous_actions=continuous_actions,
        dict_spaces=dict_spaces,
        wrapper=None,
        seed=None,
        n_targets=n_targets,
        n_agents=n_agents,
        wandb=wandb,
        targets_pos=targets_pos
    )
    previous_states = {}
    for step in range(1, n_steps):
        print(f"Step {step}")
        actions = {}
        logs = {}
        for i, agent in enumerate(env.agents):
            lidar_measure = previous_states[agent.name]["lidar_measure"] if step > 1 else torch.zeros(num_envs, lidar_measure_shape).to(Device.get())
            positions = agent.state.pos
            velocities = agent.state.vel
            agent_actions_list = []
            for j in range(num_envs):
                state = lidar_measure[j]#torch.cat((positions[j], velocities[j], lidar_measure[j]),dim=-1).to(Device.get())
                action = dql.behavioural(state)
                #print(action)
                agent_actions_list.append(action)
            agent_actions = torch.stack(agent_actions_list)
            actions.update({agent.name: agent_actions})
            if step > dql.batch_size/num_envs:
                dql.improve() # Improve the model
                #TODO Should I do the improve once for each env or once for each agent?
        obs, rewards, dones, info = env.step(actions)
        mean_reward = 0
        #print(rewards)
        for i, agent in enumerate(env.agents):
            positions = agent.state.pos
            velocities = agent.state.vel
            lidar_measure = obs[agent.name][:, (tot_shape - lidar_measure_shape):]
            previous_states.update({agent.name: {"lidar_measure": lidar_measure, "pos": positions, "vel": velocities}})
            for j in range(num_envs):
                reward = rewards[agent.name][j]
                mean_reward += reward
                logs.update({f"reward_{agent.name}_env_{j}": reward})
                prev_state = previous_states[agent.name]
                prev_state = prev_state["lidar_measure"][j]#torch.cat((prev_state["pos"][j], prev_state["vel"][j], prev_state["lidar_measure"][j]),dim=-1).to(Device.get())
                state = obs[agent.name][j]
                action = actions[agent.name][j]
                dql.record(prev_state,action,reward,state)
        mean_reward /= (num_envs*n_agents)
        logs.update({"epsilon": dql.epsilon.value()})
        logs.update({"loss": dql.last_loss})
        logs.update({"mean_reward": mean_reward})
        logs.update({f"mean_reward_epoch_{e}": mean_reward})
    
        wandb.log(logs)
        dql.epsilon.update() # Update epsilon
        #dql.snapshot(step, "0")
        frame_list.append(
            Image.fromarray(env.render(mode="rgb_array", agent_index_focus=None))
        )  # Can give the camera an agent index to focus on
        
        #print(info)
        if isOneEnvDone(info):
            print("Env done")
            dql.target_network.load_state_dict(dql.policy_network.state_dict())
            break
    
    
    # Produce a gif
    frame_list_copy = copy.deepcopy(frame_list)
    thread = threading.Thread(target=save_gif, args=(frame_list_copy, e))
    
    thread.start()
    
    frame_list.clear()
    
    total_time = time.time() - init_time
    print(
        f"It took: {total_time}s for {n_steps} steps of {num_envs} parallel environments on device {device} "
        f"for {scenario_name} scenario."
    )
    


[34m[1mwandb[0m: Currently logged in as: [33mfilocava99[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step 1




Step 2
Step 3
Step 4
Step 5
Step 6
Step 7
Step 8
Step 9
Step 10
Step 11
Step 12
Step 13
Step 14
Step 15
Step 16
Step 17
Step 18
Step 19
Step 20
Step 21
Step 22
Step 23
Step 24
Step 25
Step 26
Step 27
Step 28
Step 29
Step 30
Step 31
Step 32
Step 33
Step 34
Step 35
Step 36
Step 37
Step 38
Step 39
Step 40
Step 41
Step 42
Step 43
Step 44
Step 45
Step 46
Step 47
Step 48
Step 49
Step 50
Step 51
Step 52
Step 53
Step 54
Step 55
Step 56
Step 57
Step 58
Step 59
Step 60
Step 61
Step 62
Step 63
Step 64
Step 65
Step 66
Step 67
Step 68
Step 69
Step 70
Step 71
Step 72
Step 73
Step 74
Step 75
Step 76
Step 77
Step 78
Step 79
Step 80
Step 81
Step 82
Step 83
Step 84
Step 85
Step 86
Step 87
Step 88
Step 89
Step 90
Step 91
Step 92
Step 93
Step 94
Step 95
Step 96
Step 97
Step 98
Step 99
Step 100
Step 101
Step 102
Step 103
Step 104
Step 105
Step 106
Step 107
Step 108
Step 109
Step 110
Step 111
Step 112
Step 113
Step 114
Step 115
Step 116
Step 117
Step 118
Step 119
Step 120
Step 121
Step 122
Step 123
Step 124

KeyboardInterrupt: 