# Forward

In [1]:
import torch

implicit_for = torch.load("implicit_admm_iter_10_bwd_10_forward.pt")
unroll_for = torch.load("unroll_admm_iter_10_bwd_10_forward.pt")

  import pynvml  # type: ignore[import]


In [3]:
implicit_for.keys()

dict_keys(['yraw', 'd_c', 'sK_final', 'y_final', 'y_scaled'])

In [4]:
unroll_for.keys()

dict_keys(['yraw', 'd_c', 'sK_final', 'y_final', 'y_scaled'])

In [21]:
from sklearn.metrics.pairwise import cosine_distances
import seaborn as sns
import matplotlib.pyplot as plt

for sim in range(implicit_for['yraw'].shape[0]):
    v1 = implicit_for['yraw'][sim, :]
    v2 = unroll_for['yraw'][sim, :]

    print(v1, v2) # Initial actions are different -> backprop is going to be different -> what if we 
    # set them to be the same?

    #distances = cosine_distances(v1, v2)

    # plt.figure(figsize=(3, 3))
    #sns.heatmap(distances, square=True, annot=True, cbar=False, cmap='Blues')
    #plt.title(f"Cosine distance for sim {sim}")
    #plt.show()

tensor([[ 0.0584],
        [-0.0584],
        [ 0.0584],
        [-0.0584],
        [-0.0584],
        [ 0.0584],
        [-0.0584],
        [ 0.0584],
        [ 0.0584],
        [-0.0584],
        [ 0.0000]], dtype=torch.float64) tensor([[-0.0757],
        [-0.0596],
        [-0.0757],
        [-0.0596],
        [ 0.0757],
        [ 0.0596],
        [ 0.0757],
        [ 0.0596],
        [-0.0757],
        [-0.0596],
        [ 0.0663]], dtype=torch.float64)
tensor([[ 0.0534],
        [ 0.2451],
        [ 0.0534],
        [ 0.2451],
        [-0.0534],
        [-0.2451],
        [-0.0534],
        [-0.2451],
        [ 0.0534],
        [ 0.2451],
        [ 0.0000]], dtype=torch.float64) tensor([[-0.1738],
        [ 0.0643],
        [-0.1738],
        [ 0.0643],
        [ 0.1738],
        [-0.0643],
        [ 0.1738],
        [-0.0643],
        [-0.1738],
        [ 0.0643],
        [ 0.0000]], dtype=torch.float64)
tensor([[ 0.1120],
        [ 0.1174],
        [ 0.1120],
        [ 0.1174],


In [1]:
!export PYTHONPATH=$PYTHONPATH:$(pwd)/src

In [1]:
import torch
import matplotlib.pyplot as plt

from envs.navigate_seeker import NavigateSeekerEnv
from safeguards.pinet import PinetSafeguard
import time

torch.set_default_dtype(torch.float64)

  import pynvml  # type: ignore[import]


# Not optimized results

In [2]:
def test_pinet_collision():

    # Initialize environment
    env = NavigateSeekerEnv(
        num_envs=1,
        num_steps=100,
        num_obstacles=1,
        min_radius=2.0,
        max_radius=4.0,
        draw_safe_action_set=True,
        polytopic_approach=False
    )

    # Reset environment
    env.reset()
    #show_image(env)

    i = 0

    # Create an action pointing directly at the obstacle
    while(i<3):
        obstacle_center = env.obstacles[0].center[0]  # pick first env and first obstacle
        action_to_obstacle = obstacle_center - env.state[:,:2]
        # Scale to ensure collision (make it long enough)
        action = action_to_obstacle
        print("Intended Collision Action:", action)
        env.execute_action(action)
        #show_image(env)
        i+=1

    experiment_configuration = [
        {"regularisation_coefficient": 0.1, "n_iter_admm": 100, "n_iter_bwd": 100, "debug": False},
        {"regularisation_coefficient": 0.1, "n_iter_admm": 100, "n_iter_bwd": 100, "fpi": True, "debug": False},
        {"regularisation_coefficient": 0.1, "n_iter_admm": 100, "n_iter_bwd": 100, "bwd_method": "unroll", "debug": False},
    ]

    for config in experiment_configuration:
        print("\nExperiment Configuration:", config)
        action_dup = action.detach().clone().requires_grad_(True)

        # Apply PinetSafeguard to get safe action
        wrapper = PinetSafeguard(env, **config)
        start_time = time.time()
        safe_action = wrapper.safeguard(action_dup)
        end_time = time.time()
        print("Safeguarding pass took {:.6f} seconds".format(end_time - start_time))
        print("Safe Action:", safe_action)

        # Backward pass for safe_action
        loss = safe_action.norm()
        start_time = time.time()
        loss.backward()
        end_time = time.time()
        print("Backward pass took {:.6f} seconds".format(end_time - start_time))
        print("Gradient w.r.t input action:", action_dup.grad)

def show_image(env):
    frames = env.render()
    for i, frame in enumerate(frames):
        # Convert tensor to numpy array
        # frame shape: (C, H, W)
        img_np = frame.permute(1, 2, 0).cpu().numpy()  # -> (H, W, C)
        
        plt.figure(figsize=(5,5))
        plt.imshow(img_np)
        plt.axis('off')
        plt.title(f"Environment {i}")
        plt.show()

test_pinet_collision()

Intended Collision Action: tensor([[4.0311, 3.1183]])
Intended Collision Action: tensor([[3.6020, 2.7930]])
Intended Collision Action: tensor([[3.2474, 2.5339]])

Experiment Configuration: {'regularisation_coefficient': 0.1, 'n_iter_admm': 100, 'n_iter_bwd': 100, 'debug': False}
Safeguarding pass took 0.007756 seconds
Safe Action: tensor([[1.4543, 1.1819]], grad_fn=<_ProjectImplicitFnBackward>)
Backward pass took 0.035191 seconds
Gradient w.r.t input action: tensor([[0.1878, 0.1866]])

Experiment Configuration: {'regularisation_coefficient': 0.1, 'n_iter_admm': 100, 'n_iter_bwd': 100, 'fpi': True, 'debug': False}
Safeguarding pass took 0.007007 seconds
Safe Action: tensor([[1.4543, 1.1819]], grad_fn=<_ProjectImplicitFnBackward>)
Backward pass took 0.033618 seconds
Gradient w.r.t input action: tensor([[-0.0210,  0.0298]])

Experiment Configuration: {'regularisation_coefficient': 0.1, 'n_iter_admm': 100, 'n_iter_bwd': 100, 'bwd_method': 'unroll', 'debug': False}
Safeguarding pass took 0.

# Testing optimization

In [16]:
# Initialize environment
env = NavigateSeekerEnv(
    num_envs=1,
    num_steps=100,
    num_obstacles=1,
    min_radius=2.0,
    max_radius=4.0,
    draw_safe_action_set=True,
    polytopic_approach=False
)

# Reset environment
env.reset()
#show_image(env)

i = 0

# Create an action pointing directly at the obstacle
while(i<3):
    obstacle_center = env.obstacles[0].center[0]  # pick first env and first obstacle
    action_to_obstacle = obstacle_center - env.state[:,:2]
    # Scale to ensure collision (make it long enough)
    action = action_to_obstacle
    print("Intended Collision Action:", action)
    env.execute_action(action)
    #show_image(env)
    i+=1

experiment_configuration = [
    {"regularisation_coefficient": 0.1, "n_iter_admm": 15, "n_iter_bwd": 10},
]


print("\nExperiment Configuration:", experiment_configuration[0])
action_dup = action.detach().clone().requires_grad_(True)

print("Original Action:", action_dup)

# Apply PinetSafeguard to get safe action
wrapper = PinetSafeguard(env, **experiment_configuration[0])
start_time = time.time()

action = action_dup.unsqueeze(2)  # match (B, D, 1)

# ----- Build Ax ≤ b -----
start_time = time.time()
A, b = wrapper.env.compute_A_b()
end_time = time.time()
print("Building Ax ≤ b took {:.6f} seconds".format(end_time - start_time))

# ADMM projection function with implicit backward that returns the final safe action.
start_time = time.time()
y_safe = wrapper._project_with_implicit(
    action=action,  # (B, D, 1)
    A=A,            # (B, m, D)
    b=b            # (B, m)
)  # returns (B, D)
end_time = time.time()
print("ADMM projection took {:.6f} seconds".format(end_time - start_time))

print("Safe Action:", y_safe)

Intended Collision Action: tensor([[4.1001, 5.3441]])
Intended Collision Action: tensor([[3.6737, 4.8523]])
Intended Collision Action: tensor([[3.3171, 4.3184]])

Experiment Configuration: {'regularisation_coefficient': 0.1, 'n_iter_admm': 15, 'n_iter_bwd': 10}
Original Action: tensor([[3.3171, 4.3184]], requires_grad=True)
Building Ax ≤ b took 0.000488 seconds
ADMM projection took 0.019990 seconds
Safe Action: tensor([[3.3171, 3.7255]], grad_fn=<_ProjectImplicitFnBackward>)


### Projection takes more than defining the state

In [2]:
# Initialize environment
env = NavigateSeekerEnv(
    num_envs=1,
    num_steps=100,
    num_obstacles=1,
    min_radius=2.0,
    max_radius=4.0,
    draw_safe_action_set=True,
    polytopic_approach=False
)

# Reset environment
env.reset()
#show_image(env)

i = 0

# Create an action pointing directly at the obstacle
while(i<3):
    obstacle_center = env.obstacles[0].center[0]  # pick first env and first obstacle
    action_to_obstacle = obstacle_center - env.state[:,:2]
    # Scale to ensure collision (make it long enough)
    action = action_to_obstacle
    print("Intended Collision Action:", action)
    env.execute_action(action)
    #show_image(env)
    i+=1

experiment_configuration = [
    {"regularisation_coefficient": 0.1, "n_iter_admm": 15, "n_iter_bwd": 10},
]


print("\nExperiment Configuration:", experiment_configuration[0])
action_dup = action.detach().clone().requires_grad_(True)

print("Original Action:", action_dup)

# Apply PinetSafeguard to get safe action
wrapper = PinetSafeguard(env, **experiment_configuration[0])
start_time = time.time()

safe_action = wrapper.safeguard(action_dup)

print("Safe Action:", safe_action)

Intended Collision Action: tensor([[ 0.4256, -5.4905]])
Intended Collision Action: tensor([[ 0.3774, -4.9389]])
Intended Collision Action: tensor([[ 0.3656, -4.4455]])

Experiment Configuration: {'regularisation_coefficient': 0.1, 'n_iter_admm': 15, 'n_iter_bwd': 10}
Original Action: tensor([[ 0.3656, -4.4455]], requires_grad=True)
Building equality constraints took 0.000086 seconds
Ruiz scaling took 0.000425 seconds
Building constraints took 0.000575 seconds
Forward took 0.020412 seconds
Safe Action: tensor([[ 0.2284, -2.0202]], grad_fn=<_ProjectImplicitFnBackward>)


In [None]:
################ Remove unnecessary .clones() ####################3

# Initialize environment
env = NavigateSeekerEnv(
    num_envs=1,
    num_steps=100,
    num_obstacles=1,
    min_radius=2.0,
    max_radius=4.0,
    draw_safe_action_set=True,
    polytopic_approach=False
)

# Reset environment
env.reset()
#show_image(env)

i = 0

# Create an action pointing directly at the obstacle
while(i<3):
    obstacle_center = env.obstacles[0].center[0]  # pick first env and first obstacle
    action_to_obstacle = obstacle_center - env.state[:,:2]
    # Scale to ensure collision (make it long enough)
    action = action_to_obstacle
    print("Intended Collision Action:", action)
    env.execute_action(action)
    #show_image(env)
    i+=1

experiment_configuration = [
    {"regularisation_coefficient": 0.1, "n_iter_admm": 15, "n_iter_bwd": 10},
]


print("\nExperiment Configuration:", experiment_configuration[0])
action_dup = action.detach().clone().requires_grad_(True)

print("Original Action:", action_dup)

# Apply PinetSafeguard to get safe action
wrapper = PinetSafeguard(env, **experiment_configuration[0])
start_time = time.time()

safe_action = wrapper.safeguard(action_dup)

print("Safe Action:", safe_action)

# Backward pass for safe_action
loss = safe_action.norm()
start_time = time.time()
loss.backward()
end_time = time.time()
print("Backward pass took {:.6f} seconds".format(end_time - start_time))
print("Gradient w.r.t input action:", action_dup.grad)

Intended Collision Action: tensor([[-1.5894,  4.7204]])
Intended Collision Action: tensor([[-1.4791,  4.2537]])
Intended Collision Action: tensor([[-1.2959,  3.8076]])

Experiment Configuration: {'regularisation_coefficient': 0.1, 'n_iter_admm': 15, 'n_iter_bwd': 10}
Original Action: tensor([[-1.2959,  3.8076]], requires_grad=True)
PinetSafeguard: ADMM projection took 0.012825 seconds
Safe Action: tensor([[-0.8014,  1.9159]], grad_fn=<_ProjectImplicitFnBackward>)
Backward pass took 0.031830 seconds
Gradient w.r.t input action: tensor([[-0.1526,  0.0660]])


In [5]:
############ Newest version #############

# Initialize environment
env = NavigateSeekerEnv(
    num_envs=1,
    num_steps=100,
    num_obstacles=1,
    min_radius=2.0,
    max_radius=4.0,
    draw_safe_action_set=True,
    polytopic_approach=False
)

# Reset environment
env.reset()
#show_image(env)

i = 0

# Create an action pointing directly at the obstacle
while(i<3):
    obstacle_center = env.obstacles[0].center[0]  # pick first env and first obstacle
    action_to_obstacle = obstacle_center - env.state[:,:2]
    # Scale to ensure collision (make it long enough)
    action = action_to_obstacle
    print("Intended Collision Action:", action)
    env.execute_action(action)
    #show_image(env)
    i+=1

experiment_configuration = [
    {"regularisation_coefficient": 0.1, "n_iter_admm": 100, "n_iter_bwd": 10, "debug": True},
]


print("\nExperiment Configuration:", experiment_configuration[0])
action_dup = action.detach().clone().requires_grad_(True)

print("Original Action:", action_dup)

# Apply PinetSafeguard to get safe action
wrapper = PinetSafeguard(env, **experiment_configuration[0])
start_time = time.time()

safe_action = wrapper.safeguard(action_dup)

end_time = time.time()
print("Safeguarding pass took {:.6f} seconds".format(end_time - start_time))

print("Safe Action:", safe_action)

# Backward pass for safe_action
loss = safe_action.norm()
start_time = time.time()
loss.backward()
end_time = time.time()
print("Backward pass took {:.6f} seconds".format(end_time - start_time))
print("Gradient w.r.t input action:", action_dup.grad)

Intended Collision Action: tensor([[ 3.8142, -4.8572]])
Intended Collision Action: tensor([[ 3.4241, -4.3377]])
Intended Collision Action: tensor([[ 3.0794, -3.8830]])

Experiment Configuration: {'regularisation_coefficient': 0.1, 'n_iter_admm': 100, 'n_iter_bwd': 10, 'debug': True}
Original Action: tensor([[ 3.0794, -3.8830]], requires_grad=True)
PinetSafeguard: ADMM projection took 0.004156 seconds
Safeguarding pass took 0.005549 seconds
Safe Action: tensor([[ 0.0725, -0.0877]], grad_fn=<_ProjectImplicitFnBackward>)
Pairwise Distance: 0.0035533992407945168
Pairwise Distance: 0.0033698683298266034
Pairwise Distance: 0.00319682904153308
Pairwise Distance: 0.0030336868776891703
Pairwise Distance: 0.0028798828675196714
Pairwise Distance: 0.0027348902315875697
Pairwise Distance: 0.0025982116799537114
Pairwise Distance: 0.0024693772252772288
Pairwise Distance: 0.002347942398190779
Pairwise Distance: 0.0022334867623204674
Backward pass took 0.004791 seconds
Gradient w.r.t input action: tens