## Meta's Dept Estimation load

In [66]:
import math
import itertools
from functools import partial

import torch
import torch.nn.functional as F

from dinov2.eval.depth.models import build_depther
import urllib

from PIL import Image

class CenterPadding(torch.nn.Module):
    def __init__(self, multiple):
        super().__init__()
        self.multiple = multiple

    def _get_pad(self, size):
        new_size = math.ceil(size / self.multiple) * self.multiple
        pad_size = new_size - size
        pad_size_left = pad_size // 2
        pad_size_right = pad_size - pad_size_left
        return pad_size_left, pad_size_right

    @torch.inference_mode()
    def forward(self, x):
        pads = list(itertools.chain.from_iterable(self._get_pad(m) for m in x.shape[:1:-1]))
        output = F.pad(x, pads)
        return output


def create_depther(cfg, backbone_model, backbone_size, head_type):
    train_cfg = cfg.get("train_cfg")
    test_cfg = cfg.get("test_cfg")
    depther = build_depther(cfg.model, train_cfg=train_cfg, test_cfg=test_cfg)

    depther.backbone.forward = partial(
        backbone_model.get_intermediate_layers,
        n=cfg.model.backbone.out_indices,
        reshape=True,
        return_class_token=cfg.model.backbone.output_cls_token,
        norm=cfg.model.backbone.final_norm,
    )

    if hasattr(backbone_model, "patch_size"):
        depther.backbone.register_forward_pre_hook(lambda _, x: CenterPadding(backbone_model.patch_size)(x[0]))

    return depther

In [67]:
BACKBONE_SIZE = "small" # in ("small", "base", "large" or "giant")


backbone_archs = {
    "small": "vits14",
    "base": "vitb14",
    "large": "vitl14",
    "giant": "vitg14",
}
backbone_arch = backbone_archs[BACKBONE_SIZE]
backbone_name = f"dinov2_{backbone_arch}"

backbone_model = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=backbone_name)
backbone_model.to("cpu")
backbone_model.eval()
# backbone_model.cuda()

Using cache found in /Users/ideoghyeon/.cache/torch/hub/facebookresearch_dinov2_main


DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )
  )
  (n

In [68]:
import urllib

import mmcv
from mmcv.runner import load_checkpoint


def load_config_from_url(url: str) -> str:
    with urllib.request.urlopen(url) as f:
        return f.read().decode()


HEAD_DATASET = "kitti" # in ("nyu", "kitti")
HEAD_TYPE = "linear" # in ("linear", "linear4", "dpt")


DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2"
head_config_url = f"{DINOV2_BASE_URL}/{backbone_name}/{backbone_name}_{HEAD_DATASET}_{HEAD_TYPE}_config.py"
head_checkpoint_url = f"{DINOV2_BASE_URL}/{backbone_name}/{backbone_name}_{HEAD_DATASET}_{HEAD_TYPE}_head.pth"

cfg_str = load_config_from_url(head_config_url)
cfg = mmcv.Config.fromstring(cfg_str, file_format=".py")

model = create_depther(
    cfg,
    backbone_model=backbone_model,
    backbone_size=BACKBONE_SIZE,
    head_type=HEAD_TYPE,
)

load_checkpoint(model, head_checkpoint_url, map_location="cpu")
model.eval()
# model.cuda()

load checkpoint from http path: https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_kitti_linear_head.pth


DepthEncoderDecoder(
  (backbone): DinoVisionTransformer()
  (decode_head): BNHead(
    align_corners=False
    (loss_decode): ModuleList(
      (0): SigLoss()
      (1): GradientLoss()
    )
    (softmax): Softmax(dim=1)
    (conv_depth): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
    (relu): ReLU()
    (sigmoid): Sigmoid()
  )
)

## Building Pipeline for transform

In [69]:
import matplotlib
from torchvision import transforms


def make_depth_transform() -> transforms.Compose:
    return transforms.Compose([
        transforms.ToTensor(),
        lambda x: 255.0 * x[:3], # Discard alpha component and scale by 255
        transforms.Normalize(
            mean=(123.675, 116.28, 103.53),
            std=(58.395, 57.12, 57.375),
        ),
    ])


def render_depth(values, colormap_name="magma_r") -> Image:
    min_value, max_value = values.min(), values.max()
    normalized_values = (values - min_value) / (max_value - min_value)

    colormap = matplotlib.colormaps[colormap_name]
    colors = colormap(normalized_values, bytes=True) # ((1)xhxwx4)
    colors = colors[:, :, :3] # Discard alpha component
    return Image.fromarray(colors)

### Drone Enviornment Setup

In [70]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import ActionTuple
from collections import deque
from replaybuffer import ReplayBuffers
import torch.nn.functional as F

In [71]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [72]:
class Actor(nn.Module):

    def __init__(self, action_dim, state_dim):
        super(Actor, self).__init__()

        self.action_dim = action_dim
        self.std_bound = [1e-2, 1.0]

        self.h1 = nn.Linear(state_dim, 128)
        self.h2 = nn.Linear(128, 64)
        self.h3 = nn.Linear(64, 32)
        self.h4 = nn.Linear(32, 16)
        self.mu = nn.Linear(16, action_dim)
        self.std = nn.Linear(16, action_dim)

    def forward(self, state):
        x = nn.functional.relu(self.h1(state))
        x = nn.functional.relu(self.h2(x))
        x = nn.functional.relu(self.h3(x))
        x = nn.functional.relu(self.h4(x))
        mu = torch.tanh(self.mu(x))
        std = nn.functional.softplus(self.std(x))

        std = torch.clamp(std, self.std_bound[0], self.std_bound[1])

        return mu, std

    def sample_normal(self, mu, std):
        normal_prob = Normal(mu, std)
        action = normal_prob.sample()

        # limit the action value
        log_prob = normal_prob.log_prob(action)
        log_prob = torch.sum(log_prob, dim=1, keepdim=True)

        return action, log_prob


In [73]:
class Critic(nn.Module):

    def __init__(self, action_dim, state_dim):
        super(Critic, self).__init__()

        self.x1 = nn.Linear(state_dim, 128)
        self.a1 = nn.Linear(action_dim, 128)
        # this layer is responsible for taking mixed state_action len.
        self.h = nn.Sequential(
            # 256 because it will be connected to two input tensors
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )


    def forward(self, state_action):
        state = state_action[0]
        action = state_action[1]
        x = nn.functional.relu(self.x1(state))
        a = nn.functional.relu(self.a1(action))
        h = torch.cat((x, a), dim=-1)
        q = self.h(h)
        return q


In [80]:
class SACagent(object):
    def __init__(self, N_STATES, N_ACTIONS):
        # Hyperparameters
        self.GAMMA = 0.99
        self.BATCH_SIZE = 1024
        self.BUFFER_SIZE = 1000000
        self.ACTOR_LEARNING_RATE = 0.0003
        self.CRITIC_LEARNING_RATE = 0.001
        self.TAU = 0.001
        self.ALPHA = 0.5

        # Observation space and Action space
        self.state_dim = N_STATES
        self.action_dim = N_ACTIONS

        # Check if CUDA is available
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Build Actor, Q1, Q2 and their target networks
        self.actor = Actor(action_dim=self.action_dim, state_dim=self.state_dim)


        self.critic_1 = Critic(action_dim=self.action_dim, state_dim=self.state_dim)
        self.target_critic_1 = Critic(action_dim=self.action_dim, state_dim=self.state_dim)

        self.critic_2 = Critic(action_dim=self.action_dim, state_dim=self.state_dim)
        self.target_critic_2 = Critic(action_dim=self.action_dim, state_dim=self.state_dim)

        # self.target_critic_1.load_state_dict(self.critic_1.state_dict())
        # self.target_critic_2.load_state_dict(self.critic_2.state_dict())

        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.ACTOR_LEARNING_RATE)
        self.critic_1_optimizer = optim.Adam(self.critic_1.parameters(), lr=self.CRITIC_LEARNING_RATE)
        self.critic_2_optimizer = optim.Adam(self.critic_2.parameters(), lr=self.CRITIC_LEARNING_RATE)

        # Clear out the buffer
        self.buffer = ReplayBuffers(self.BUFFER_SIZE)

        # For plotting purposes, data is stored.
        self.policy_loss = []
        self.reward_list = []

    def get_action(self, state):
        with torch.no_grad():
            state = torch.tensor([state], dtype=torch.float32).to(self.device)
            mu, std = self.actor(state)
            normal = torch.distributions.Normal(mu, std)
            action = normal.sample()
            return action.cpu().numpy()

    def update_target_network(self, TAU):
        phi_1 = self.critic_1.state_dict()
        phi_2 = self.critic_2.state_dict()
        target_phi_1 = self.target_critic_1.state_dict()
        target_phi_2 = self.target_critic_2.state_dict()
        for name in phi_1:
            target_phi_1[name] = TAU * phi_1[name] + (1 - TAU) * target_phi_1[name]
            target_phi_2[name] = TAU * phi_2[name] + (1 - TAU) * target_phi_2[name]
        self.target_critic_1.load_state_dict(target_phi_1)
        self.target_critic_2.load_state_dict(target_phi_2)


    # train Q1, Q2
    def critic_learn(self, states, actions, y_i):
        states = torch.tensor(states, dtype=torch.float32).to(self.device)
        actions = torch.tensor(actions, dtype=torch.float32).to(self.device)
        q_targets = y_i.to(self.device)
        
        q_1 = self.critic_1([states, actions])
        # where q_1 is the predicted value, q_targets is the true val.
        loss_1 = F.mse_loss(q_1, q_targets)

        # sets the gradients of all parameters of the optimizer to zero. This is necessary to prevent the gradients from accumulating from multiple backpropagation passes.
        self.critic_1_optimizer.zero_grad()
        #computes the gradients of the loss with respect to all the learnable parameters in the critic network
        loss_1.backward()
        #updates the parameters of the critic network based on the computed gradients.
        self.critic_1_optimizer.step()

        q_2 = self.critic_2([states, actions])
        loss_2 = F.mse_loss(q_2, q_targets)

        self.critic_2_optimizer.zero_grad()
        loss_2.backward()
        self.critic_2_optimizer.step()

    def actor_learn(self, states):
        states = torch.tensor(states, dtype=torch.float32).to(self.device)
        mu, std = self.actor(states)
        actions, log_pdfs = self.actor.sample_normal(mu, std)
        log_pdfs = log_pdfs.squeeze(1)
        soft_q_1 = self.critic_1([states, actions])
        soft_q_2 = self.critic_2([states, actions])
        soft_q = torch.min(soft_q_1, soft_q_2)

        loss = torch.mean(self.ALPHA * log_pdfs - soft_q)

        self.actor_optimizer.zero_grad()
        loss.backward()
        self.actor_optimizer.step()

        return float(loss)

    def q_target(self, rewards, q_values, dones):
        y_k = np.asarray(q_values)
        for i in range(q_values.shape[0]):
            if dones[i]:
                y_k[i] = rewards[i]
            else:
                y_k[i] = rewards[i] + self.GAMMA * q_values[i]
        return torch.tensor(y_k, dtype=torch.float32)

    def load_weights(self, path):
        self.actor.load_state_dict(torch.load(path + 'Drone_actor_2q.pth'))
        self.critic_1.load_state_dict(torch.load(path + 'Drone_critic_12q.pth'))
        self.critic_2.load_state_dict(torch.load(path + 'Drone_critic_22q.pth'))


    def train(self, max_episode_num, env, behavior_name):
        transform = make_depth_transform() # Apply the transformation pipeline

        self.param_counter = 0
        cnt = 0
        # reset target network param.
        self.update_target_network(1.0)

        for ep in range(int(max_episode_num)):
            is_learning = False
            frame, episode_reward = 0, 0
            # reset the enviornment
            env.reset()
            decision_steps, terminal_steps = env.get_steps(behavior_name)
            episode_done = False

            # setting up the initial state as an array
            #----------------------------------------
            np_camera_image = decision_steps.obs[0][0] # Camera Sensor
            v_agent_target = decision_steps.obs[1][0] # Agent's position (x,y,z) + Target's position (x,y,z)
            image_tensor = np_camera_image     # just a naming for better understanding
            transformed_image = transform(image_tensor)
            # batch = transformed_image.unsqueeze(0).cuda() # Make a batch of one image
            batch = transformed_image.unsqueeze(0) # Make a batch of one image
            with torch.inference_mode():
                result = model.whole_inference(batch, img_meta=None, rescale=True)
            treated_image_2d = result.squeeze().cpu()
            treated_image = treated_image_2d.flatten() # converts 2d into 1d
            # now concat the two 1d array for state -> SAC
            state = np.concatenate((treated_image, v_agent_target), 0)
            #-----------------------------------------

            while not episode_done:

                action = self.get_action(state)
                # wrap the action with ActionTuple before sending it to UE.
                action = ActionTuple(np.array(action, dtype = np.float32))

                env.set_actions(behavior_name, action)
                # move the agent along with the action.
                env.step()
                action = action._continuous # converting ActionTuple to array
                next_decision_steps, next_terminal_steps = env.get_steps(behavior_name)

                # if the agent is still on, collect data and add it to buffer.
                if next_decision_steps:
                    # get the reward.
                    train_reward = next_decision_steps.reward[0]

                    #----------------------------------------
                    np_camera_image = decision_steps.obs[0][0] # Camera Sensor
                    v_agent_target = decision_steps.obs[1][0] # Agent's position (x,y,z) + Target's position (x,y,z)
                    image_tensor = np_camera_image     # just a naming for better understanding
                    transformed_image = transform(image_tensor)
                    # batch = transformed_image.unsqueeze(0).cuda() # Make a batch of one image
                    batch = transformed_image.unsqueeze(0) # Make a batch of one image
                    with torch.inference_mode():
                        result = model.whole_inference(batch, img_meta=None, rescale=True)
                    treated_image_2d = result.squeeze().cpu()
                    treated_image = treated_image_2d.flatten() # converts 2d into 1d
                    # now concat the two 1d array for state -> SAC
                    next_state = np.concatenate((treated_image, v_agent_target), 0)
                    #-----------------------------------------

                    episode_reward += next_decision_steps.reward[0]
                    # store the data to the buffer
                    self.buffer.add_data(state, action, train_reward, next_state, False)
                    episode_done = False

                # if the agent is off, collect data and add True for done.
                if next_terminal_steps:
                    # get the reward.
                    train_reward = next_terminal_steps.reward[0]

                    #----------------------------------------
                    np_camera_image = decision_steps.obs[0][0] # Camera Sensor
                    v_agent_target = decision_steps.obs[1][0] # Agent's position (x,y,z) + Target's position (x,y,z)
                    image_tensor = np_camera_image     # just a naming for better understanding
                    transformed_image = transform(image_tensor)
                    # batch = transformed_image.unsqueeze(0).cuda() # Make a batch of one image
                    batch = transformed_image.unsqueeze(0) # Make a batch of one image
                    with torch.inference_mode():
                        result = model.whole_inference(batch, img_meta=None, rescale=True)
                    treated_image_2d = result.squeeze().cpu()
                    treated_image = treated_image_2d.flatten() # converts 2d into 1d
                    # now concat the two 1d array for state -> SAC
                    next_state = np.concatenate((treated_image, v_agent_target), 0)
                    #-----------------------------------------



                    episode_reward += next_terminal_steps.reward[0]
                    # store the data to the buffer
                    self.buffer.add_data(state, action, train_reward, next_state, True)
                    episode_done = True

                # if buffer has enough data start training.
                if self.buffer.buffer_count() > self.BUFFER_SIZE - self.BATCH_SIZE:
                    is_learning = True

                    states, actions, rewards, next_states, dones = self.buffer.sample_batch(self.BATCH_SIZE)

                    # Calculate the Q target value
                    with torch.no_grad():
                        next_mu, next_std = self.actor(torch.tensor(next_states, dtype=torch.float32).to(self.device))
                        next_actions, next_log_pdf = self.actor.sample_normal(next_mu, next_std)

                        # convert np to tensor
                        tensor_next_states = torch.tensor(next_states, dtype=torch.float32)
                        tensor_next_actions = torch.tensor(next_actions, dtype=torch.float32)

                        # move to CUDA
                        tensor_next_states = tensor_next_states.to(self.device)
                        tensor_next_actions = tensor_next_actions.to(self.device)

                        target_qs_1 = self.target_critic_1([tensor_next_states, tensor_next_actions])
                        target_qs_2 = self.target_critic_2([tensor_next_states, tensor_next_actions])
                        target_qs = torch.min(target_qs_1, target_qs_2)

                        target_qi = target_qs - self.ALPHA * next_log_pdf
                        y_i = self.q_target(rewards, target_qi.numpy(), dones)


                    self.critic_learn(states, actions, y_i)

                    # update Actor and return policy loss
                    policy_loss = self.actor_learn(states)

                    self.update_target_network(self.TAU)

                state = next_state
                frame += 1
                cnt += 1

            # Episode output
            print('Episode: ', ep+1, 'Frame: ', frame, 'u Reward: ', episode_reward)

            if is_learning:
                self.reward_list.append(train_reward)
                self.policy_loss.append(policy_loss)

            # every 250th run will store another params
            if ep % 250 == 0:
                torch.save(self.actor.state_dict(), "./saved_weights/250th/actor_"+ str(self.param_counter)+"_2q.pth")
                torch.save(self.critic_1.state_dict(), "./saved_weights/250th/critic_"+str(self.param_counter)+"_12q.pth")
                torch.save(self.critic_2.state_dict(), "./saved_weights/250th/critic_"+str(self.param_counter)+"_22q.pth")
                self.param_counter += 1

    def plot_result(self):
        fig=plt.figure(figsize=(18, 6))
        fig.add_subplot(1, 3, 1)  # 1 row, 3 columns
        plt.plot(self.reward_list)

        fig.add_subplot(1, 3, 3)
        plt.plot(self.policy_loss)
        plt.show()



#### Run Unity Enviornment

In [75]:
N_ACTIONS = 3

In [76]:
# env = UnityEnvironment(file_name= "./Linux_Drone_v0.0.2/Linux_Drone_with.x86_64", base_port=5004)
env = UnityEnvironment(file_name= None, base_port=5004)


In [77]:
env.reset()
behavior_names = list(env.behavior_specs.keys())
behavior_name = behavior_names[0]
decision_steps, terminal_steps = env.get_steps(behavior_name)

In [78]:
# GET N_STATES or len_STATES rather
transform = make_depth_transform() # Apply the transformation pipeline
#----------------------------------------
np_camera_image = decision_steps.obs[0][0] # Camera Sensor
v_agent_target = decision_steps.obs[1][0] # Agent's position (x,y,z) + Target's position (x,y,z)
image_tensor = np_camera_image     # just a naming for better understanding
transformed_image = transform(image_tensor)
# batch = transformed_image.unsqueeze(0).cuda() # Make a batch of one image
batch = transformed_image.unsqueeze(0) # Make a batch of one image
with torch.inference_mode():
    result = model.whole_inference(batch, img_meta=None, rescale=True)
treated_image_2d = result.squeeze().cpu()
treated_image = treated_image_2d.flatten() # converts 2d into 1d
# now concat the two 1d array for state -> SAC
state = np.concatenate((treated_image, v_agent_target), 0)
#-----------------------------------------
N_STATES = len(state)
N_STATES

7071

In [81]:
agent = SACagent(N_STATES, N_ACTIONS)
# usually 30K is enough.
agent.train(30000, env, behavior_name)

now [ 17.922789   17.922789   17.922789  ... -30.26        3.4052465
  -0.67     ]
Episode:  1 Frame:  106 u Reward:  -9.474193572998047
now [ 27.70325  27.70325  27.70325 ... -30.26      3.37313  -0.67   ]
Episode:  2 Frame:  85 u Reward:  -10.6334228515625
now [ 17.865644  17.865644  17.865644 ... -30.26       3.383378  -0.67    ]


KeyboardInterrupt: 

In [None]:
agent.plot_result()

In [82]:
env.close()