In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import torch
from torch import multiprocessing

In [3]:
is_fork = multiprocessing.get_start_method() == "fork"

device = (
    torch.device(0)
    if torch.cuda.is_available() and not is_fork
    else torch.device("cpu")
)

## Environment Preparation

#### Load unity environment using `mlagents_envs`

In [4]:
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
from mlagents_envs.environment import UnityEnvironment

channel = EngineConfigurationChannel()
env_path = "D:/_Thesis/warehouse-bot-training/environment_builds/warehouse_stage2_find/Warehouse_Bot.exe"

unity_env = UnityEnvironment(
  file_name=env_path,
  side_channels=[channel],
  # additional_args=["-batchmode", "-nographics"]
)
channel.set_configuration_parameters(time_scale=1)

#### Transform environment from `mlagents` to `gymnasium`

In [5]:
import gymnasium as gym
print(gym.__version__)

1.1.1


In [6]:
from env_camera_raycasts_gymnasium_wrapper import UnityCameraRaycastsGymWrapper 
from env_raycasts_gymnasium_Wrapper import UnityRaycastsGymWrapper 

# gymnasium_env = UnityRaycastsGymWrapper(unity_env)
gymnasium_env = UnityCameraRaycastsGymWrapper(unity_env)

#### Creating stable_baselines3 model

In [7]:
from stable_baselines3 import PPO

model = PPO.load("./saved_models/baselines/stage2/find_1_500k", gymnasium_env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [8]:
print(model.policy)

MultiInputActorCriticPolicy(
  (features_extractor): CustomCombinedExtractor(
    (image_enc_net): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (5): ReLU(inplace=True)
      (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): AdaptiveAvgPool2d(output_size=(1, 1))
      (9): Flatten(start_dim=1, end_dim=-1)
      (10): Linear(in_features=64, out_features=48, bias=True)
      (11): ReLU(inplace=True)
      (12): Linear(in_features=48, out_features=32, bias=True)
      (13): ReLU(inplace=True)
    )
    (vector_enc_net): Sequential(
      (0): Linear(in_features=60, out_features=16, bias=True)
      (1): ReLU()
    )
    (linear): Sequential(
      (0): Linear(in_features

In [9]:
env = model.get_env()
obs = env.reset()

In [10]:
import time

for i in range(5000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = env.step(action)
    # time.sleep(0.5)