In [None]:
import torch

from rl_demo.envs import DiscreteReservoirEnv

In [2]:

env = DiscreteReservoirEnv(
    v_max=1000.0,
    v_min=100.0,
    
    v_dead=50.0,
    initial_volume=500.0,
)

# Reset environment
print("Resetting environment...")
td = env.reset()
print(f"Initial observation shape: {td['observation'].shape}")
print(f"Initial volume percentage: {td['observation'][0]:.2f}")

# Run a few steps with random actions
print("\nRunning 10 steps with random actions:")
for step in range(10):
    # Sample random action
    action = torch.randint(0, 11, (1,))[0]
    td["action"] = action
    
    # Take step
    td = env.step(td)
    
    # Extract info from nested structure
    obs = td["next"]["observation"]
    reward = td["next"]["reward"].item()
    done = td["next"]["done"].item()
    
    print(f"Step {step+1}: Action={action.item():2d}, "
            f"Volume%={obs[0]:.2f}, Reward={reward:6.2f}, Done={done}")
    
    if done:
        print("Episode terminated!")
        break

print("\nEnvironment test completed successfully!")



Resetting environment...
Initial observation shape: torch.Size([13])
Initial volume percentage: 0.50

Running 10 steps with random actions:
Step 1: Action= 0, Volume%=0.56, Reward=-50.00, Done=False
Step 2: Action= 2, Volume%=0.66, Reward=  2.19, Done=False
Step 3: Action= 2, Volume%=0.76, Reward=  7.95, Done=False
Step 4: Action= 7, Volume%=0.82, Reward= 36.37, Done=False
Step 5: Action= 8, Volume%=0.86, Reward=-73.74, Done=False
Step 6: Action= 0, Volume%=0.98, Reward=-1308.72, Done=False
Step 7: Action= 9, Volume%=0.99, Reward=-1336.68, Done=False
Step 8: Action= 6, Volume%=1.00, Reward=-1447.82, Done=False
Step 9: Action=10, Volume%=1.00, Reward=-1410.92, Done=False
Step 10: Action= 4, Volume%=1.00, Reward=-1465.08, Done=False

Environment test completed successfully!
