In [11]:
import torch
import torch.nn as nn
import gymnasium as gym
import numpy as np
import cv2

IMG_SIZE = 96
# --- Settings ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHTS_PATH = "torch_weights/dqn_2000eps_full_rgb_alpha_01_gamma_09/dqn_policy_weights_ep2000.pth"

# --- Environment Setup ---
env = gym.make("CarRacing-v3", render_mode="human", continuous=False)
action_space = env.action_space.n
state_space = (3, IMG_SIZE, IMG_SIZE)

#
# --- Preprocessing ---
def preprocess(image):
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0
    image = np.transpose(image, (2, 0, 1))
    return image

# --- CNN Model ---
class DQNCNN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super().__init__()
        c, h, w = input_shape
        # Define conv layers first
        self.conv = nn.Sequential(
            nn.Conv2d(c, 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(),
        )
        # Compute conv output size
        with torch.no_grad():
            dummy = torch.zeros(1, c, h, w)
            conv_out_size = self.conv(dummy).view(1, -1).size(1)
        # Now define the full network
        self.net = nn.Sequential(
            self.conv,
            nn.Flatten(),
            nn.Linear(conv_out_size, 512), nn.ReLU(),
            nn.Linear(512, num_actions)
        )

    def forward(self, x):
        return self.net(x)

# --- Recreate Model and Load Weights ---
model = DQNCNN(state_space, action_space).to(DEVICE)
model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE))
model.eval()

# --- Run a Single Episode with the Loaded Model ---
obs, _ = env.reset()
done = False
total_reward = 0

# Preprocess initial state
state = preprocess(obs)

while not done:
    # Prepare state tensor
    state_tensor = torch.tensor(state, dtype=torch.float32, device=DEVICE).unsqueeze(0)
    with torch.no_grad():
        q_values = model(state_tensor)
    action = q_values.argmax(1).item()
    
    next_obs, reward, terminated, truncated, _ = env.step(action)
    done = terminated or truncated
    state = preprocess(next_obs)
    total_reward += reward
    env.render()

print(f"Episode finished with total reward: {total_reward}")
env.close() 

  model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE))


Episode finished with total reward: -92.95774647887222
