In [2]:
import os
os.chdir('../')
os.getcwd()

'e:\\github_clone\\gridGPT'

In [3]:
from src.agents.gpt import gridGPT, gridGPTAgent
import torch
import grid2op
from lightsim2grid import LightSimBackend

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = grid2op.make("l2rpn_case14_sandbox",
                    backend=LightSimBackend())



config = {
    'has_continuous_action_space': False,
    'gamma': 0.99,
    'eps_clip': 0.2,
    'K_epochs': 4,
    'lr_actor': 3e-5,
}

agent = gridGPTAgent(env, config)

# ---- Dummy input tensors ----
m = agent.policy  # gridGPT model

B = 1
L = m.config['context_len']
state_dim = m.config['state_dim']
action_size = m.config['action_size']

prev_state = torch.randn(B, L, state_dim, device=device)
next_state = torch.randn(B, L, state_dim, device=device)
prev_action = torch.randint(low=0, high=action_size, size=(B, L), device=device)
slot_idx = torch.arange(L, device=device).unsqueeze(0)
timestep = torch.arange(100, 100 + L, device=device).unsqueeze(0)
action_mask_last = torch.ones(B, action_size, dtype=torch.bool, device=device)

# ---- Call select_action ----
action, grid_action, logprob, value = agent.select_action(
    prev_state, prev_action, next_state, slot_idx, timestep, action_mask_last
)

print("\n=== TEST OUTPUT ===")
print("Raw discrete action ID:", action)
print("Converted grid action:", grid_action)
print("Log prob:", logprob)
print("State value:", value)


09:03:09 | INFO     | Using ActionConverter with action size: 178

=== TEST OUTPUT ===
Raw discrete action ID: 30
Converted grid action: This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - Set the bus of the following element(s):
	 	 - Assign bus 1 to line (extremity) id 0 [on substation 1]
	 	 - Assign bus 2 to line (origin) id 2 [on substation 1]
	 	 - Assign bus 2 to line (origin) id 3 [on substation 1]
	 	 - Assign bus 2 to line (origin) id 4 [on substation 1]
	 	 - Assign bus 1 to generator id 0 [on substation 1]
	 	 - Assign bus 2 to load id 0 [on substation 1]
Log prob: tensor([-5.6125], device='cuda:0')
State value: tensor([0.5169], device='cuda:0')


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
m = gridGPT().to(device).eval()

L = m.config['context_len']
B = 2
state_dim   = m.config['state_dim']
action_size = m.config['action_size']

g = torch.Generator(device=device).manual_seed(123)
prev_states  = torch.randn(B, L, state_dim, generator=g, device=device)
next_states  = torch.randn(B, L, state_dim, generator=g, device=device)
prev_actions = torch.randint(low=0, high=action_size, size=(B, L), generator=g, device=device)
slot_idx     = torch.arange(L, device=device).unsqueeze(0).repeat(B, 1)
t0 = 100
timestep     = torch.arange(t0, t0 + L, device=device).unsqueeze(0).repeat(B, 1)
action_mask_last = torch.ones(B, action_size, dtype=torch.bool, device=device)

# stochastic action (exploration)
a, a_logp, v = m.act(prev_states, prev_actions, next_states, slot_idx, timestep, action_mask_last, deterministic=False)
print("sampled action:", a.tolist())
print("logprob:", a_logp.tolist())
print("value:", v.tolist())

# greedy action (evaluation)
a_g, a_g_logp, v_g = m.act(prev_states, prev_actions, next_states, slot_idx, timestep, action_mask_last, deterministic=True)
print("greedy action:", a_g.tolist())


sampled action: [122, 95]
logprob: [-5.1724042892456055, -5.187069892883301]
value: [0.04453800246119499, 0.11552482098340988]
greedy action: [25, 69]


In [4]:
import torch
import torch.nn.functional as F

# ========= CUDA / device setup =========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Optional: slightly faster matmul on Ampere+ GPUs
try:
    torch.set_float32_matmul_precision("high")
except Exception:
    pass

# ----- 1) Instantiate model -----
m = gridGPT()            # your class
m.eval().to(device)      # move to GPU

# Sanity: L must equal the context_len you configured
L = m.config['context_len']
B = 2
state_dim   = m.config['state_dim']
action_size = m.config['action_size']

# ----- 2) Build a small window batch on the right device -----
# Random but fixed for reproducibility
g = torch.Generator(device=device).manual_seed(42)

prev_states   = torch.randn(B, L, state_dim, generator=g, device=device)
next_states   = torch.randn(B, L, state_dim, generator=g, device=device)
prev_actions  = torch.randint(low=0, high=action_size, size=(B, L), generator=g, device=device)

# slot indices 0..L-1 per batch
slot_idx = torch.arange(L, device=device).unsqueeze(0).repeat(B, 1)

# absolute timesteps (e.g., 100..100+L-1). You can mod/clip if needed.
t0 = 100
timestep = torch.arange(t0, t0 + L, device=device).unsqueeze(0).repeat(B, 1)

# Optional mask for last slot (all actions are valid here)
action_mask_last = torch.ones(B, action_size, dtype=torch.bool, device=device)

# ----- 3) Forward pass: get logits/probs for current decision (last slot) -----
with torch.no_grad():
    logits = m(prev_states, prev_actions, next_states, slot_idx, timestep, action_mask_last=action_mask_last)
    probs  = F.softmax(logits, dim=-1)

print("Probs (batch 0):", probs[0].tolist())

# ----- 4) Show that HISTORY affects the decision -----
# Keep the LAST slot identical, but perturb an EARLY slot in sample 0.
prev_states_pert   = prev_states.clone()
next_states_pert   = next_states.clone()
prev_actions_pert  = prev_actions.clone()

# Modify an early slot (e.g., slot 2) in sample 0:
k = 2  # some history slot, not the last
prev_states_pert[0, k] += 1.5
next_states_pert[0,  k] -= 1.0
prev_actions_pert[0, k] = (prev_actions_pert[0, k] + 1) % action_size

with torch.no_grad():
    logits_pert = m(prev_states_pert, prev_actions_pert, next_states_pert, slot_idx, timestep, action_mask_last=action_mask_last)
    probs_pert  = F.softmax(logits_pert, dim=-1)

print("Probs after history change (batch 0):", probs_pert[0].tolist())

# Compare the difference for batch 0
delta = (probs[0] - probs_pert[0]).abs()
print("Max abs change in probs (batch 0):", float(delta.max()))


Using device: cuda
Probs (batch 0): [0.004006871487945318, 0.004645700100809336, 0.007731734309345484, 0.004200957249850035, 0.004426130559295416, 0.0059465281665325165, 0.0037149381823837757, 0.005040147807449102, 0.0053291283547878265, 0.005648881196975708, 0.004240559879690409, 0.007967649027705193, 0.00638725096359849, 0.007827120833098888, 0.006583447102457285, 0.006404843647032976, 0.007433420047163963, 0.00384664093144238, 0.004772510379552841, 0.005047444719821215, 0.006787172984331846, 0.0055165705271065235, 0.005603952798992395, 0.0046149552799761295, 0.005651440005749464, 0.005218531470745802, 0.007042560260742903, 0.004284872207790613, 0.0032555346842855215, 0.007129600737243891, 0.003655410371720791, 0.0053522237576544285, 0.0042580412700772285, 0.0055267103016376495, 0.005788374226540327, 0.00641618063673377, 0.00452744634822011, 0.005573666654527187, 0.005930819548666477, 0.004972686991095543, 0.005547208711504936, 0.007894421927630901, 0.0058663347736001015, 0.010178259

In [5]:
greedy_actions = probs.argmax(dim=-1)              # shape: (B,)

# OR: Sampling (stochastic) action per batch
sampled_actions = torch.multinomial(probs, num_samples=1).squeeze(-1)  # shape: (B,)

# If your downstream code needs CPU ints:
greedy_actions_cpu   = greedy_actions.detach().cpu().tolist()   # e.g., [37, 5]
sampled_actions_cpu  = sampled_actions.detach().cpu().tolist()  # e.g., [12, 88]

print("Greedy actions (per batch):", greedy_actions_cpu)
print("Sampled actions (per batch):", sampled_actions_cpu)


Greedy actions (per batch): [43, 2]
Sampled actions (per batch): [114, 103]


In [6]:
import torch
import torch.nn.functional as F
from torch import nn

# --------- device ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# --------- your model class must already be defined (gridGPT, Block, etc.) ----------
m = gridGPT().to(device).eval()

# --------- config ----------
L            = m.config['context_len']   # window length
state_dim    = m.config['state_dim']
action_size  = m.config['action_size']
max_timestep = m.config['max_timestep']

# --------- dummy environment ----------
# Very simple linear-ish dynamics: s_{t+1} = s_t + W[:, a] * 0.05 + noise
# (so actions nudge state in different directions)
g = torch.Generator(device=device).manual_seed(7)
W = torch.randn(state_dim, action_size, generator=g, device=device) * 0.1

def env_step(state, action_id, g):
    # Create noise with the same shape/device/dtype as `state`
    noise = torch.randn(state.shape, device=state.device, dtype=state.dtype, generator=g) * 0.01
    drift = (W[:, action_id].to(state.dtype)) * 0.05
    return state + drift + noise


# --------- rollout length (<= max_timestep - 1 to be safe) ----------
T = min(32, max_timestep - 1)  # number of decisions we’ll take

# --------- buffers for the sliding window (B=1) ----------
# We’ll maintain the window content and shift left each step.
window_prev_states = torch.zeros(L, state_dim, device=device)
window_next_states = torch.zeros(L, state_dim, device=device)
window_actions     = torch.zeros(L, dtype=torch.long, device=device)  # pad with action 0

# slot indices are always 0..L-1
slot_idx_full = torch.arange(L, device=device).unsqueeze(0)  # [1, L]

# mask: all actions valid at the last slot
action_mask_last = torch.ones(1, action_size, dtype=torch.bool, device=device)

# --------- initial state s0 ----------
s_t = torch.randn(state_dim, generator=g, device=device) * 0.1  # small random start
a_prev = 0  # pad action for the first decision

greedy_actions = []
sampled_actions = []
probs_history = []

for t in range(T):
    # Shift-left the window by 1 (discard oldest slot)
    window_prev_states = torch.roll(window_prev_states, shifts=-1, dims=0)
    window_next_states = torch.roll(window_next_states, shifts=-1, dims=0)
    window_actions     = torch.roll(window_actions,     shifts=-1, dims=0)

    # For slot L-1 (the "decision" slot):
    # prev_state = s_{t-1} if t>0 else zeros
    if t == 0:
        prev_slot = torch.zeros(state_dim, device=device)
        last_action = 0  # pad
    else:
        prev_slot = s_prev.clone()
        last_action = int(a_prev)

    # next_state = s_t (current observed state)
    window_prev_states[-1] = prev_slot
    window_next_states[-1] = s_t
    window_actions[-1]     = last_action

    # Build batch tensors [B=1, L, ...]
    prev_states_b = window_prev_states.unsqueeze(0)  # [1, L, state_dim]
    next_states_b = window_next_states.unsqueeze(0)  # [1, L, state_dim]
    actions_b     = window_actions.unsqueeze(0)      # [1, L]

    # Absolute time indices for positions in the window.
    # We'll map the k-th slot to time (t-L+1+k), clipped to [0, max_timestep-1]
    base = t - (L - 1)
    times = torch.clamp(torch.arange(base, base + L, device=device), min=0, max=max_timestep-1)
    timestep_b = times.unsqueeze(0)  # [1, L]

    with torch.no_grad():
        logits = m(
            prev_states_b, actions_b, next_states_b,
            slot_idx=slot_idx_full, timestep=timestep_b,
            action_mask_last=action_mask_last
        )  # [1, action_size]
        probs = F.softmax(logits, dim=-1)  # [1, action_size]

    # Choose action for time t
    a_greedy  = int(probs.argmax(dim=-1)[0].item())
    a_sampled = int(torch.multinomial(probs[0], num_samples=1).item())

    greedy_actions.append(a_greedy)
    sampled_actions.append(a_sampled)
    probs_history.append(probs[0].detach().cpu())

    # Take the action (choose greedy by default here)
    a_t = a_greedy

    # Step env: produce s_{t+1}
    s_prev = s_t.clone()
    s_t = env_step(s_t, a_t, g)


    # Remember for next loop
    a_prev = a_t

# --------- results ----------
print("\nGreedy actions:", greedy_actions)
print("Sampled actions:", sampled_actions)
print("Last-step probs (first 10):", probs_history[-1][:10].tolist())


Using device: cuda

Greedy actions: [99, 63, 32, 32, 88, 44, 87, 87, 135, 44, 149, 33, 32, 33, 32, 167, 44, 165, 44, 144, 87, 32, 87, 32, 53, 32, 33, 32, 167, 135, 87, 32]
Sampled actions: [41, 162, 48, 96, 14, 18, 111, 136, 69, 158, 88, 119, 104, 14, 25, 21, 80, 67, 154, 75, 140, 173, 14, 21, 30, 40, 145, 84, 59, 150, 16, 44]
Last-step probs (first 10): [0.006164961028844118, 0.004137567710131407, 0.0048055206425487995, 0.006171464454382658, 0.004117234144359827, 0.005092212464660406, 0.007607095409184694, 0.005709832068532705, 0.005305347964167595, 0.005209771450608969]
