In [1]:
# Run the controller
import torch
from torch.profiler import profile, record_function, ProfilerActivity
import numpy as np
from utils import dtype, npdtype, load_param, get_device
from controllers.mppi_base import ControllerBase
from costs.static import Static
from models.auv_torch import AUVFossen
from observers.observer_base import ObserverBase

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_config_file = "../../config/models/rexrov2.default.yaml"
cost_config_file = "../../config/tasks/static_cost.torch.yaml"
cont_config_file = "../../config/controller/rexrov2.torch.yaml"

In [3]:
k = 2000
tau = 50
lam = 0.25
gamma = 0.1
upsilon = 1.5

dt = 0.1

model_conf = load_param(model_config_file)
cost_conf = load_param(cost_config_file)
cont_conf = load_param(cont_config_file)

Q = np.array(cost_conf["Q"], dtype=npdtype)
sigma = np.array(cont_conf["noise"], dtype=npdtype)
goal = np.array(cost_conf["goal"], dtype=npdtype)[..., None]


In [6]:
# GPU execution
device = get_device(gpu=0)
print(device)

model = AUVFossen(model_conf).to(device)
cost = Static(lam=lam,
              gamma=gamma,
              upsilon=upsilon,
              sigma=sigma,
              goal=goal,
              Q=Q).to(device)
obs = ObserverBase(log=False)
cont = ControllerBase(model=model,
                      cost=cost,
                      observer=obs,
                      k=k,
                      tau=tau,
                      lam=lam,
                      upsilon=upsilon,
                      sigma=sigma).to(device)

state = torch.Tensor(np.zeros((3+4+3+3, 1), dtype=npdtype)).to(device)
state[6] = 1.

# warmstart 
act = cont(state)

with profile(with_stack=True, profile_memory=True, use_cuda=True) as prof:
    with record_function("model_inference"):
        act = cont(state)

print(prof.key_averages(group_by_stack_n=10).table(sort_by="self_cpu_time_total", row_limit=5))

cont.rest_timer()

for i in range(20):
    act = cont(state)
    next_state = model(state[None, ...], act[None, ...])[0]
    state = next_state
timeDict = cont.timeDict

print("Total: {:10.4f} s".format(timeDict["total"]/timeDict["calls"]))
print("Rollout: {:10.4f} s".format(timeDict["rollout"]/timeDict["calls"]))
print("Model: {:10.4f} s".format(timeDict["model"]/timeDict["calls"]))
print("Cost: {:10.4f} s".format(timeDict["cost"]/timeDict["calls"]))
print("Update: {:10.4f} s".format(timeDict["update"]/timeDict["calls"]))


True
cuda:0


  warn("use_cuda is deprecated, use activities argument instead")
STAGE:2023-04-27 11:05:41 9014:9014 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 11:05:41 9014:9014 ActivityProfilerController.cpp:300] Completed Stage: Collection


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------------------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  Source Location                                                              
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------------------------  
     

In [7]:
# CPU execution
device = get_device(cpu=True)
print(device)

model = AUVFossen(model_conf).to(device)
cost = Static(lam=lam,
              gamma=gamma,
              upsilon=upsilon,
              sigma=sigma,
              goal=goal,
              Q=Q).to(device)
obs = ObserverBase(log=False)
cont = ControllerBase(model=model,
                      cost=cost,
                      observer=obs,
                      k=k,
                      tau=tau,
                      lam=lam,
                      upsilon=upsilon,
                      sigma=sigma).to(device)

state = torch.Tensor(np.zeros((3+4+3+3, 1), dtype=npdtype)).to(device)
state[6] = 1.

# warmstart 
act = cont(state)

with profile(with_stack=True, profile_memory=True, use_cuda=True) as prof:
    with record_function("model_inference"):
        act = cont(state)

print(prof.key_averages(group_by_stack_n=10).table(sort_by="self_cpu_time_total", row_limit=5))

for i in range(20):
    act = cont(state)
    next_state = model(state[None, ...], act[None, ...])[0]
    state = next_state
timeDict = cont.timeDict

print("Total: {:10.4f} s".format(timeDict["total"]/timeDict["calls"]))
print("Rollout: {:10.4f} s".format(timeDict["rollout"]/timeDict["calls"]))
print("Model: {:10.4f} s".format(timeDict["model"]/timeDict["calls"]))
print("Cost: {:10.4f} s".format(timeDict["cost"]/timeDict["calls"]))
print("Update: {:10.4f} s".format(timeDict["update"]/timeDict["calls"]))

cpu


STAGE:2023-04-27 11:06:03 9014:9014 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 11:06:04 9014:9014 ActivityProfilerController.cpp:300] Completed Stage: Collection


-----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------------------------  
                         Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Source Location                                                              
-----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------------------------  
              model_inference        23.76%     126.961ms        99.99%     534.238ms     534.238ms       1.19 Kb    -569.30 Mb             1  ...-in method _record_function_enter of PyCapsule object at 0x7f10bdc2e630>  
                                                                                                                    

: 

In [1]:
import torch
import numpy as np
from torch import nn
import torch.autograd.profiler as profiler

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
class MyModuleOpti(nn.Module):
    def __init__(self):
        super(MyModuleOpti, self).__init__()
        mTot = torch.Tensor(
                np.array([[100., 0., 0., 0., 0., 0.],
                          [0., 100., 0., 0., 0., 0.],
                          [0., 0., 100., 0., 0., 0.],
                          [0., 0., 0., 100., 0., 0.],
                          [0., 0., 0., 0., 100., 0.],
                          [0., 0., 0., 0., 0., 100.]]) + 1e-7)

        self.register_buffer("mTot", mTot)
        self.register_buffer("invMtot", torch.linalg.inv(mTot))

    def forward(self, input):
        acc = torch.matmul(self.invMtot, input)
        return acc

class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        mTot = torch.Tensor(
                np.array([[100., 0., 0., 0., 0., 0.],
                          [0., 100., 0., 0., 0., 0.],
                          [0., 0., 100., 0., 0., 0.],
                          [0., 0., 0., 100., 0., 0.],
                          [0., 0., 0., 0., 100., 0.],
                          [0., 0., 0., 0., 0., 100.]]) + 1e-7)

        self.register_buffer("mTot", mTot)
        self.register_buffer("invMtot", torch.linalg.inv(mTot))

    def forward(self, input):
        acc = torch.linalg.solve(self.mTot, input)
        return acc


In [26]:
model = MyModule().cuda()
input = torch.rand(20, 6, 1).cuda()

# Warm-up
model(input)

with profiler.profile(with_stack=True, profile_memory=True) as prof:
    out = model(input)
print(prof.key_averages(group_by_stack_n=15).table(sort_by='self_cpu_time_total', row_limit=5))

------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  -----------------------------------------------------------  
                          Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  Source Location                                              
------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  -----------------------------------------------------------  
               cudaMemcpyAsync        45.69%       1.563ms        45.69%       1.563ms     390.750us           0 b           0 b           0 b           0 b             4  <built-in function linalg_solve>                             
                                                               

STAGE:2023-04-26 23:20:16 18091:18091 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-26 23:20:16 18091:18091 ActivityProfilerController.cpp:300] Completed Stage: Collection


In [27]:
model_opti = MyModuleOpti().cuda()

# Warm-up
model_opti(input)

with profiler.profile(with_stack=True, profile_memory=True) as prof:
    out_opti = model_opti(input)
print(prof.key_averages(group_by_stack_n=15).table(sort_by='self_cpu_time_total', row_limit=5))


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  -------------------------------------------------------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  Source Location                                                
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  -------------------------------------------------------------  
                                               aten::mm        92.32%       1.802ms        95.03%       1.855ms       1.855ms           0 b           0 b         512 b         512 b             1  <built-in method ma

STAGE:2023-04-26 23:20:18 18091:18091 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-26 23:20:18 18091:18091 ActivityProfilerController.cpp:300] Completed Stage: Collection
