In [1]:
# project
import os
import sys 
project_dir = os.path.join(os.path.expanduser('~'), 'git', 'diffsim-sysid')
sys.path.insert(1, project_dir)
# math
import numpy as np
# optim
import torch
# physics
import warp as wp
import warp.sim as wps
# graphics
from copy import deepcopy
import drjit as dr
import mitsuba as mi
from mitsuba.scalar_rgb import Transform4f as mit
# plots
from matplotlib import pyplot as plt
from src.mpl_utils import set_fig_size, set_mpl_format
# sysid
from src.sysid import SysId
# config
from src.config import cfg
# dataset
from src.sim import SIM_DURATION, SIM_DT, SIM_FPS, FRAME_DT, FRAME_STEPS, SIM_STEPS, SIM_SUBSTEPS, Sample, build_phys, run_phys, calc_density, get_density
from src.dataset import load_train_data
from src.warp_utils import render_usd

# setup
set_mpl_format()
wp.init()
mi.set_variant('llvm_ad_rgb')
print(f'Mitsuba 3 Variant: {mi.variant()}')

2024-04-06 16:41:28.511911: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Warp 1.0.2 initialized:
   CUDA Toolkit 11.5, Driver 11.4
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "NVIDIA GeForce GTX 1050 Ti with Max-Q Design" (4 GiB, sm_61, mempool enabled)
   Kernel cache:
     /home/jonathan/.cache/warp/1.0.2
Mitsuba 3 Variant: llvm_ad_rgb


### Config

In [2]:
# fill in your directory set up here
config_fp = os.path.join(project_dir, f'cfg/templates/geom-movi_a.yaml')
cfg.merge_from_file(config_fp)
cfg.path = project_dir
cfg.DATA.path = os.path.join(project_dir, 'data/sets/')
print(cfg)

DATA:
  fps: 12
  fpv: 24
  height: 256
  max_objs: 10
  path: /home/jonathan/git/diffsim-sysid/data/sets/
  set: movi_a
  width: 256
EVAL:
  OUTPUT:
    FN:
      config: config.yaml
      log: log.txt
      pred: pred.csv
    path: 
  PARAM:
    DENSITY:
      include: True
SYS_ID:
  OPTIM:
    beta1: 0.9
    beta2: 0.999
    decay: 0.0
    lr: 0.01
    optim: adam
  geom: True
  iter: 4
  rand: False
  spp: 4
  vis: False
path: /home/jonathan/git/diffsim-sysid


### Data

In [3]:
train_ds, ds_info = load_train_data(cfg)
example = next(iter(train_ds))
time_step = 20
vid_id = example['metadata']['video_name']
print(f'Video: {vid_id}')

2024-04-06 16:41:30.075155: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-04-06 16:41:30.076406: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-04-06 16:41:30.076715: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-04-06 16:41:30.077232: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operation

Video: b'1680'


In [4]:
sysid = SysId(cfg)
target = Sample(cfg, example)
start_time_step = 0
final_time_step = -1

# input
sysid.set_sample(example)
sysid.set_priors()

# targets
target_time_step_dict = target.get_render_timestep_dict(final_time_step)
target_vis = mi.render(mi.load_dict(target_time_step_dict), spp=64).torch()
target_geom = torch.tensor(target.body_q[final_time_step], requires_grad=False)
target_density = torch.tensor(get_density(example), requires_grad=False)

# training
cfg.SYS_ID.iter = 10
cfg.SYS_ID.lr = 1.0
train_save_interval = 1

Module warp.sim.collide load on device 'cuda:0' took 107.65 ms
Module warp.sim.integrator_euler load on device 'cuda:0' took 80.75 ms
Module warp.sim.integrator load on device 'cuda:0' took 5.25 ms


In [6]:
# optimized variables
pred_density = sysid.density_prior

# physics
phys_device = wp.get_cuda_devices()[0]
phys_integrator = wps.SemiImplicitIntegrator()

# optimization
if cfg.SYS_ID.geom:
    loss = torch.nn.MSELoss(reduction='sum')
if cfg.SYS_ID.vis:
    loss = torch.nn.MSELoss(reduction='mean')

# metrics
est_errors = [] 
losses = []

for i in range(cfg.SYS_ID.iter+1):
    # physics forward
    phys_model = build_phys(cfg, example,
                            target.body_q[start_time_step], 
                            target.body_qd[start_time_step],
                            pred_density)
    phys_states = [phys_model.state(requires_grad=True) for _ in range(SIM_STEPS+1)]
    phys_tape = run_phys(phys_model, phys_states, phys_integrator)
    phys_inv_mass = wp.to_torch(phys_model.body_inv_mass)
    phys_pred = wp.to_torch(phys_states[-1].body_q)

    opt = torch.optim.SGD([phys_inv_mass], lr=cfg.SYS_ID.OPTIM.lr)

    if cfg.SYS_ID.geom:
        # loss
        output = loss(phys_pred, target_geom.to('cuda'))
    elif cfg.SYS_ID.vis:
        pass
        # render forward
        #vis_pred = render(phys_pred.to(vis_device))
        # loss
        #output = loss(vis_pred, target_vis)

    if i < cfg.SYS_ID.iter:
        # backward
        output.backward()

        wp_phys_pred = wp.from_torch(phys_pred)
        phys_tape.backward(grads={wp_phys_pred: wp_phys_pred.grad})
        print('param grad', phys_inv_mass.grad)


        # update
        #opt.step()

        # restrict

        # step
        new_inv_mass = phys_inv_mass.cpu().detach().numpy() - cfg.SYS_ID.OPTIM.lr * phys_inv_mass.grad.cpu().detach().numpy()
        np.clip(new_inv_mass, sysid.inv_mass_min.numpy(), sysid.inv_mass_max.numpy(), out=new_inv_mass)
        #phys_inv_mass= torch.clamp(phys_inv_mass.to('cpu'), sysid.inv_mass_min, sysid.inv_mass_max).detach().numpy()
        pred_density = torch.tensor(calc_density(new_inv_mass, example))
        # phys_params.data = target_params.to('cuda').data

    # zero
    opt.zero_grad(set_to_none=False)
    phys_tape.zero()

    # garbage collection
    render_usd(f'iter_{i}.usd', phys_model, phys_states, SIM_DURATION, SIM_DT)
    phys_inv_mass = None
    phys_model = None
    phys_states = None
    phys_tape = None
    wp_phys_pred = None
    
    losses.append(output.cpu().detach().numpy())
    est_error = torch.functional.F.mse_loss(pred_density, target_density).detach().numpy()
    est_errors.append(est_error) 
    if i % train_save_interval == 0:
        print(f"iter_{i:02d}: loss={losses[-1]:6f}, error={est_errors[-1]:6f}")
        print(f'{pred_density}')
        #print(f'{phys_pred.cpu().detach().numpy()}')

# # prediction errors
# # pred errors = TODO

param grad tensor([-1.1218e+04, -1.6433e+02,  2.0658e+06,  1.6809e+06, -4.4922e+03,
        -4.1881e-01,  1.8908e+01,  4.2788e+02], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 610.37it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_0.usd`
iter_00: loss=185.954834, error=0.715771
tensor([1.1000, 1.1000, 2.7000, 2.7000, 1.1000, 1.8786, 2.7000, 2.7000])
param grad tensor([-6.0765e+11,  2.8015e-03, -1.6808e+02,  1.0758e+04,  1.1271e+05,
        -4.6950e+05,  1.9074e+04,         nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 648.68it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_1.usd`
iter_01: loss=12.516238, error=   nan
tensor([1.1000, 1.1001, 1.1000, 2.7000, 2.7000, 1.1000, 2.7000,    nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 595.27it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_2.usd`
iter_02: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 611.24it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_3.usd`
iter_03: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 619.16it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_4.usd`
iter_04: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 589.45it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_5.usd`
iter_05: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 521.55it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_6.usd`
iter_06: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 610.18it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_7.usd`
iter_07: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 570.07it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_8.usd`
iter_08: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])
param grad tensor([nan, nan, nan, nan, nan, nan, nan, nan], device='cuda:0')


100%|██████████| 481/481 [00:00<00:00, 580.62it/s]


Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_9.usd`
iter_09: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])


100%|██████████| 481/481 [00:00<00:00, 559.58it/s]

Saved the USD stage file at `/home/jonathan/git/diffsim-sysid/examples/iter_10.usd`
iter_10: loss=   nan, error=   nan
tensor([nan, nan, nan, nan, nan, nan, nan, nan])





In [None]:
cfg.SYS_ID.OPTIM.lr

0.01