In [1]:
import gymnasium as gym

In [50]:
m = 2**256

In [51]:
m

115792089237316195423570985008687907853269984665640564039457584007913129639936

In [52]:
n = 1e9

In [53]:
-n**2 / (2 * m)

-4.3180842775472223e-60

4.0

In [42]:
import math

In [49]:
1 - math.exp(-n**2 / (2 * m))

0.02674100890033493

In [36]:
"root".split("/")

['root']

# Box Space

In [54]:
env = gym.make('PointMaze_UMazeDense-v3')

In [58]:
env.metadata['render_modes']

['human', 'rgb_array', 'depth_array']

In [3]:
type(env.observation_space)

gymnasium.spaces.dict.Dict

In [23]:
foo = gym.spaces.Box(low=-1, high=1, shape=())

In [24]:
foo_sample = foo.sample()

In [25]:
foo_sample.size

1

In [26]:
foo_sample.shape

()

In [27]:
sum(foo_sample.shape)

0

In [28]:
repr(foo)

'Box(-1.0, 1.0, (), float32)'

In [13]:
bar = gym.spaces.Text(max_length=10)

In [19]:
bar.sample()

'HtZZ'

In [20]:
bar.shape

In [22]:
baz = gym.spaces.MultiDiscrete([2, 3, 4])

In [24]:
baw = gym.spaces.MultiBinary(10)

In [25]:
baw.shape

(10,)

In [4]:
discrete = gym.spaces.Discrete(5)

In [5]:
discrete.shape

()

In [6]:
sum(discrete.shape)

0

In [48]:
type(discrete.sample())

numpy.int64

In [53]:
env = gym.make('LunarLander-v2')

In [55]:
env.observation_space.sample()

array([-0.92412674, -0.1333271 , -1.963013  , -2.8194652 , -3.0863338 ,
       -3.8225172 ,  0.00653581,  0.11339503], dtype=float32)

In [58]:
env.action_space

Discrete(4)

In [57]:
type(env.action_space.sample())

numpy.int64

In [34]:
tuple_space = gym.spaces.Tuple([foo, bar])

In [35]:
tuple_space.spaces

(Box(-1.0, 1.0, (), float32),
 Text(1, 10, charset=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz))

In [36]:
seq_space = gym.spaces.Sequence(foo)

In [39]:
seq_space.feature_space

Box(-1.0, 1.0, (), float32)

In [7]:
env.observation_space['achieved_goal'].shape

(2,)

In [None]:
for k, v in env.observation_space.items():
    print(k)

In [32]:
env.observation_space

Dict('achieved_goal': Box(-inf, inf, (2,), float64), 'desired_goal': Box(-inf, inf, (2,), float64), 'observation': Box(-inf, inf, (4,), float64))

In [12]:
type(env.observation_space)

gymnasium.spaces.dict.Dict

In [47]:
env.observation_space['achieved_goal'].shape

(2,)

In [40]:
obs_space = env.observation_space

In [42]:
for key in obs_space.keys():
    print(key)

achieved_goal
desired_goal
observation


In [43]:
env.spec

EnvSpec(id='PointMaze_UMazeDense-v3', entry_point='gymnasium_robotics.envs.maze.point_maze:PointMazeEnv', reward_threshold=None, nondeterministic=False, max_episode_steps=300, order_enforce=True, autoreset=False, disable_env_checker=False, apply_api_compatibility=False, kwargs={'maze_map': [[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]], 'reward_type': 'dense'}, namespace=None, name='PointMaze_UMazeDense', version=3, additional_wrappers=(), vector_entry_point=None)

In [3]:
obs = env.observation_space.sample()

In [4]:
obs

OrderedDict([('achieved_goal', array([ 1.72302223, -1.05896134])),
             ('desired_goal', array([0.8429858 , 1.31911617])),
             ('observation',
              array([ 0.65584758, -0.92570483, -0.33714198, -1.34361531]))])

In [7]:
env.observation_space.to_jsonable([obs])

{'achieved_goal': [[1.7230222286828372, -1.0589613429722422]],
 'desired_goal': [[0.8429858009090716, 1.31911617339504]],
 'observation': [[0.6558475752925086,
   -0.9257048326942372,
   -0.3371419848268517,
   -1.3436153075654953]]}

In [9]:
import json

In [16]:
obs_str = json.dumps(env.observation_space.to_jsonable([obs]))

In [17]:
obs_str

'{"achieved_goal": [[1.7230222286828372, -1.0589613429722422]], "desired_goal": [[0.8429858009090716, 1.31911617339504]], "observation": [[0.6558475752925086, -0.9257048326942372, -0.3371419848268517, -1.3436153075654953]]}'

In [27]:
from pydantic import BaseModel, Json
from typing import Optional, Any, List

In [15]:
class Foo(BaseModel):
    obs: Optional[Json[Any]]

In [18]:
foo = Foo(obs=obs_str)

In [22]:
foo.model_dump_json()

'{"obs":{"achieved_goal":[[1.7230222286828372,-1.0589613429722422]],"desired_goal":[[0.8429858009090716,1.31911617339504]],"observation":[[0.6558475752925086,-0.9257048326942372,-0.3371419848268517,-1.3436153075654953]]}}'

In [15]:
type(foo)

str

In [42]:
import requests
import json

# Your endpoint URL
url = "http://localhost:8000/step"


class StepCreate(BaseModel):
    experiment_id: str
    environment_id: str
    episode: int
    # In simulation, all the observations and actions are aligned in time.
    step: int
    # NOTE: observation and action are flattened into 1D arrays.
    observation: Optional[Json[Any]] = None
    action: Optional[Json[Any]] = None
    reward: Optional[float] = 0.0
    # This is potentially useful to train a policy conditioned on sub goals.
    # sub_goal: Optional[Goal] = None
    image_urls: Optional[List[str]] = []
    termnated: Optional[bool] = False
    truncated: Optional[bool] = False
    # This is deprecated by Gymnasium, but we still keep it for backward
    # compatibility.
    done: Optional[bool] = False
    info: Optional[Json[Any]] = None  # JSON string of step info


# The data you want to send, structured as a list of lists or any other structure
# data = StepCreate.model_validate_json('{"experiment_id":"eazk1rva","environment_id":"LunarLander-v2","episode":9,"step":112,"observation":[[-0.9637165665626526,-0.02285194583237171,-0.8739522695541382,-0.12580598890781403,1.4199990034103394,6.530352592468262,0.0,1.0]],"action":[0],"reward":-100.0,"image_urls":[],"termnated":true,"truncated":false,"done":false,"info":{}}')
data = StepCreate(experiment_id="eazk1rva", 
                  environment_id="LunarLander-v2",
                  episode=9,
                  step=112,
                  action=json.dumps(0),
                  observation=json.dumps([[-0.9637165665626526,-0.02285194583237171,-0.8739522695541382,-0.12580598890781403,1.4199990034103394,6.530352592468262,0.0,1.0]]))
                  
                  
# Ensure the data is properly JSON-encoded. The `json` parameter in the `requests.post` method does this automatically.
response = requests.post(url, json=data.model_dump())

# If you need to manually encode the data and send it as a string (less common), you would do something like this:
# headers = {'Content-Type': 'application/json'}
# response = requests.post(url, data=json.dumps(data), headers=headers)

print(response.text)


{"detail":[{"type":"json_type","loc":["body","observation"],"msg":"JSON input should be string, bytes or bytearray","input":[[-0.9637165665626526,-0.02285194583237171,-0.8739522695541382,-0.12580598890781403,1.4199990034103394,6.530352592468262,0.0,1.0]],"url":"https://errors.pydantic.dev/2.6/v/json_type"},{"type":"json_type","loc":["body","action"],"msg":"JSON input should be string, bytes or bytearray","input":0,"url":"https://errors.pydantic.dev/2.6/v/json_type"}]}


In [44]:
data.json()

/tmp/ipykernel_1184371/354839536.py:1: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
  data.json()


'{"experiment_id":"eazk1rva","environment_id":"LunarLander-v2","episode":9,"step":112,"observation":[[-0.9637165665626526,-0.02285194583237171,-0.8739522695541382,-0.12580598890781403,1.4199990034103394,6.530352592468262,0.0,1.0]],"action":0,"reward":0.0,"image_urls":[],"termnated":false,"truncated":false,"done":false,"info":null}'

In [43]:
data

StepCreate(experiment_id='eazk1rva', environment_id='LunarLander-v2', episode=9, step=112, observation=[[-0.9637165665626526, -0.02285194583237171, -0.8739522695541382, -0.12580598890781403, 1.4199990034103394, 6.530352592468262, 0.0, 1.0]], action=0, reward=0.0, image_urls=[], termnated=False, truncated=False, done=False, info=None)

In [26]:
import gymnasium as gym

from huggingface_sb3 import load_from_hub
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy

# Retrieve the model from the hub
## repo_id = id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name})
## filename = name of the model zip file from the repository
checkpoint = load_from_hub(
    repo_id="jxx123/ppo-LunarLander-v2",
    filename="ppo-LunarLander-v2.zip",
)

In [17]:
checkpoint

'/home/jinyu/.cache/huggingface/hub/models--jxx123--ppo-LunarLander-v2/snapshots/c3ac2142e9a5555104d8f9f1afccd6ab701566a7/ppo-LunarLander-v2.zip'

In [18]:
import base64
import cloudpickle

# The provided base64 string
base64_str = "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg=="

# Decode the base64 string to bytes
decoded_bytes = base64.b64decode(base64_str)

# Deserialize the bytes using cloudpickle
deserialized_object = cloudpickle.loads(decoded_bytes)

In [22]:
deserialized_object.load(checkpoint)

RuntimeError: [enforce fail at inline_container.cc:135] . file in archive is not in a subdirectory: data

In [23]:
model = deserialized_object.load(checkpoint)

RuntimeError: [enforce fail at inline_container.cc:135] . file in archive is not in a subdirectory: data

In [26]:
deserialized_object

<function torch.nn.modules.module.Module.named_modules(self, memo: Optional[Set[ForwardRef('Module')]] = None, prefix: str = '', remove_duplicate: bool = True)>

In [28]:
mdl = PPO.load(checkpoint)

In [29]:
mdl

<stable_baselines3.ppo.ppo.PPO at 0x7fbc88778490>

In [30]:
mdl.env

In [31]:
import gymnasium as gym
from gymnasium.envs.registration import register
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv


def custom_reward(BG_last_hour):
    if BG_last_hour[-1] > 180:
        return -1
    elif BG_last_hour[-1] < 70:
        return -2
    else:
        return 1


register(
    id="simglucose/adolescent2-v0",
    entry_point="simglucose.envs:T1DSimGymnaisumEnv",
    max_episode_steps=10000,
    kwargs={
        "patient_name": "adolescent#002",
        "reward_fun": custom_reward,
    },
)

register(
    id="simglucose/adolescent1-v0",
    entry_point="simglucose.envs:T1DSimGymnaisumEnv",
    max_episode_steps=10000,
    kwargs={
        "patient_name": "adolescent#001",
        "reward_fun": custom_reward,
    },
)

In [32]:
vec_env = DummyVecEnv(
    [
        lambda: gym.make("simglucose/adolescent2-v0", render_mode="human"),
        lambda: gym.make("simglucose/adolescent1-v0", render_mode="human"),
    ]
)

In [34]:
vec_env.envs

[<TimeLimit<OrderEnforcing<PassiveEnvChecker<T1DSimGymnaisumEnv<simglucose/adolescent2-v0>>>>>,
 <TimeLimit<OrderEnforcing<PassiveEnvChecker<T1DSimGymnaisumEnv<simglucose/adolescent1-v0>>>>>]

In [35]:
vec_env

<stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv at 0x7fbc764917d0>

In [36]:
env = gym.make("simglucose/adolescent2-v0", render_mode="human")

In [37]:
env

<TimeLimit<OrderEnforcing<PassiveEnvChecker<T1DSimGymnaisumEnv<simglucose/adolescent2-v0>>>>>