In [1]:
import torch
import gym
import numpy as np

from ji_dog_net import PPO
from ji_dog_net import ActorCritic
from ji_dog_net import process_state

## Check device

In [2]:
print("============================================================================================")
# set device to cpu or cuda
device = torch.device('cpu')
if(torch.cuda.is_available()): 
    device = torch.device('cuda:0') 
    torch.cuda.empty_cache()
    print("Device set to : " + str(torch.cuda.get_device_name(device)))
else:
    print("Device set to : cpu")
print("============================================================================================")


Device set to : NVIDIA GeForce RTX 4060 Laptop GPU


## Test ACNet

In [3]:

state_dim = 20  
action_dim = 4 
ppo = PPO(state_dim, action_dim, lr_actor=0.0003, lr_critic=0.001, gamma=0.99, K_epochs=4, eps_clip=0.2)
state = torch.rand((state_dim,)).to(device)  
action, action_logprob, hidden_actor = ppo.policy.act(state)

print("Action output:", action)
print("Action log probability:", action_logprob)

state_value = ppo.policy.evaluate_critic(state)

print("State value output:", state_value)

ppo.buffer.rewards.append(1.0)
ppo.buffer.is_terminals.append(False)

print("PPO complete, network has been trained for one iteration.")


Action output: tensor([[ 0.3659, -0.1104, -0.3700, -0.3385]], device='cuda:0')
Action log probability: tensor([-2.1186], device='cuda:0')
State value output: tensor([-0.0930], device='cuda:0', grad_fn=<ViewBackward0>)
PPO complete, network has been trained for one iteration.


## Train PPO

In [4]:
# Environment and simulation setup
from isaacsim import SimulationApp
simulation_app = SimulationApp({"headless": True})

from ji_dog_env_create import Ji_Dog_Env
from tqdm import tqdm  
import numpy as np

if __name__ == "__main__":
    env = Ji_Dog_Env()
    state_dim = 20
    action_dim = 4
    max_training_timesteps = 10
    max_timesteps = 3
    ppo = PPO(state_dim, action_dim, lr_actor=0.0003, lr_critic=0.001, gamma=0.99, K_epochs=4, eps_clip=0.2)

    for episode in tqdm(range(max_training_timesteps), desc="Training Episodes"):
        state = env.reset()
        state = process_state(state)
        state = np.array(state)

        with tqdm(total=max_timesteps, desc=f"Episode {episode}", leave=False) as pbar:
            for t in range(max_timesteps + 1):
                action = ppo.select_action(state)
                state, reward, done, _ = env.step(action[0])
                state = process_state(state)
                state = np.array(state)
                ppo.buffer.rewards.append(reward)
                ppo.buffer.is_terminals.append(done)

                pbar.update(1)

                if done:
                    break
                    
        ppo.update()
        save_path = "Model_Checkpoints/Ji_dog_{}_Episode.pth".format(episode)
        print("save checkpoint path : " + save_path)
        ppo.save(save_path)
        
        if episode % 10 == 0:
            print(f"Episode {episode} completed.")
print('Finished!')

Starting kit application with the following args:  ['/home/bai/.local/share/ov/pkg/isaac-sim-4.2.0/exts/omni.isaac.kit/omni/isaac/kit/simulation_app.py', '/home/bai/.local/share/ov/pkg/isaac-sim-4.2.0/apps/omni.isaac.sim.python.kit', '--/app/tokens/exe-path=/home/bai/.local/share/ov/pkg/isaac-sim-4.2.0/kit', '--/persistent/app/viewport/displayOptions=3094', '--/rtx/materialDb/syncLoads=True', '--/rtx/hydra/materialSyncLoads=True', '--/omni.kit.plugin/syncUsdLoads=True', '--/app/renderer/resolution/width=1280', '--/app/renderer/resolution/height=720', '--/app/window/width=1440', '--/app/window/height=900', '--/renderer/multiGpu/enabled=True', '--/app/fastShutdown=False', '--ext-folder', '/home/bai/.local/share/ov/pkg/isaac-sim-4.2.0/exts', '--ext-folder', '/home/bai/.local/share/ov/pkg/isaac-sim-4.2.0/apps', '--/physics/cudaDevice=0', '--portable', '--no-window', '--/app/window/hideUi=1']
Passing the following args to the base kit application:  ['-f', '/home/bai/.local/share/jupyter/run

Training Episodes:   0%|                                 | 0/10 [00:00<?, ?it/s]
Episode 0:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 0: 100%|██████████████████████████████████| 3/3 [00:00<00:00, 26.28it/s][A
Training Episodes:  10%|██▌                      | 1/10 [00:02<00:22,  2.48s/it][A

save checkpoint path : Model_Checkpoints/Ji_dog_0_Episode.pth
Episode 0 completed.



Episode 1:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 1:  33%|███████████▎                      | 1/3 [00:00<00:00,  7.12it/s][A
Episode 1: 4it [00:00, 15.39it/s]                                               [A
Training Episodes:  20%|█████                    | 2/10 [00:03<00:12,  1.54s/it]

save checkpoint path : Model_Checkpoints/Ji_dog_1_Episode.pth



Episode 2:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 2: 100%|██████████████████████████████████| 3/3 [00:00<00:00, 28.24it/s][A
Training Episodes:  30%|███████▌                 | 3/10 [00:04<00:08,  1.15s/it][A

save checkpoint path : Model_Checkpoints/Ji_dog_2_Episode.pth



Episode 3:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 3: 100%|██████████████████████████████████| 3/3 [00:00<00:00, 28.13it/s][A
Training Episodes:  40%|██████████               | 4/10 [00:04<00:05,  1.05it/s][A

save checkpoint path : Model_Checkpoints/Ji_dog_3_Episode.pth



Episode 4:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 4: 4it [00:00, 35.76it/s]                                               [A
Training Episodes:  50%|████████████▌            | 5/10 [00:05<00:04,  1.20it/s]

save checkpoint path : Model_Checkpoints/Ji_dog_4_Episode.pth



Episode 5:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 5: 4it [00:00, 37.69it/s]                                               [A
Training Episodes:  60%|███████████████          | 6/10 [00:05<00:02,  1.35it/s]

save checkpoint path : Model_Checkpoints/Ji_dog_5_Episode.pth



Episode 6:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 6: 4it [00:00, 36.49it/s]                                               [A
Training Episodes:  70%|█████████████████▌       | 7/10 [00:06<00:01,  1.52it/s]

save checkpoint path : Model_Checkpoints/Ji_dog_6_Episode.pth



Episode 7:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 7: 4it [00:00, 38.22it/s]                                               [A
Training Episodes:  80%|████████████████████     | 8/10 [00:06<00:01,  1.66it/s]

save checkpoint path : Model_Checkpoints/Ji_dog_7_Episode.pth



Episode 8:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 8: 4it [00:00, 34.41it/s]                                               [A
Training Episodes:  90%|██████████████████████▌  | 9/10 [00:07<00:00,  1.68it/s]

save checkpoint path : Model_Checkpoints/Ji_dog_8_Episode.pth



Episode 9:   0%|                                          | 0/3 [00:00<?, ?it/s][A
Episode 9: 100%|██████████████████████████████████| 3/3 [00:00<00:00, 28.24it/s][A
Training Episodes: 100%|████████████████████████| 10/10 [00:08<00:00,  1.24it/s][A

save checkpoint path : Model_Checkpoints/Ji_dog_9_Episode.pth
Finished!





In [None]:
simulation_app.close()

