In [1]:
import gymnasium as gym
import numpy as np
import torch
from torch import nn
import random
from collections import deque
import torch.optim as optim
import torch.nn.functional as F
from collections import namedtuple
import math
import highway_env
#highway_env.register_highway_envs()

In [2]:
env = gym.make("parking-v0", render_mode="rgb_array")

In [3]:
config = {
    "observation": {
        "type": "KinematicsGoal",
        "features": ['x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'],
        "scales": [100, 100, 5, 5, 1, 1],
        "normalize": False
    },
    "action": {
        "type": "ContinuousAction"
    },
    "simulation_frequency": 15,
    "policy_frequency": 5,
    "screen_width": 600,
    "screen_height": 300,
    "centering_position": [0.5, 0.5],
    "scaling": 7,
    "show_trajectories": False,
    "render_agent": True,
    "offscreen_rendering": False,
}

env.unwrapped.configure(config)
env.reset()

(OrderedDict([('observation',
               array([ 0.        ,  0.        , -0.        ,  0.        , -0.98415592,
                       0.17730518])),
              ('achieved_goal',
               array([ 0.        ,  0.        , -0.        ,  0.        , -0.98415592,
                       0.17730518])),
              ('desired_goal',
               array([-1.400000e-01, -1.400000e-01,  0.000000e+00,  0.000000e+00,
                       6.123234e-17, -1.000000e+00]))]),
 {'speed': 0,
  'crashed': False,
  'action': array([ 0.23642844, -0.5107486 ], dtype=float32),
  'is_success': False})

In [4]:
candidate_actions = []

for steering in np.linspace(-0.5, 0.5, 3):
    for acceleration in np.linspace(0.8, 0.4, 3):
        candidate_actions.append(torch.Tensor([acceleration, steering]))

In [5]:
class DQN(nn.Module):
    def __init__(self, input_size, hidden_size, action_size):
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, action_size)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input tensor
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)


In [6]:
Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))

In [7]:

class ReplayMemory (object) : 
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [8]:
# Hyperparamètres
BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10
LEARNING_RATE = 0.001
num_episodes = 50

policy_net = DQN(1272, 128, 9)
target_net = DQN(1272, 128, 9)


# Initialisation de l'optimiseur
optimizer = torch.optim.Adam(policy_net.parameters(), lr=LEARNING_RATE)


import torch

def select_action(state_tensor, steps_done, action_size=2):
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)

    if sample > eps_threshold:
        with torch.no_grad():
            action = policy_net(state_tensor)
            action = torch.clamp(action, -1, 1)
            return action.squeeze()  # Squeeze the tensor to remove the singleton dimension
    else:
        action = torch.tensor([np.random.uniform(-1, 1, size=(action_size,))], dtype=torch.float)
        return action.squeeze()

# Fonction d'optimisation
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    # Ici, vous préparerez les données et exécuterez une étape d'optimisation.

steps_done = 0
memory = ReplayMemory(10000)

for i_episode in range(num_episodes):
    # Initialization of the environment and the state
    state, _ = env.reset()
    state_values = [state[key] for key in state.keys()]
    state_tensor = torch.tensor([state_values], dtype=torch.float)

    done = False
    while not done:
        # Select and execute an action
        action = select_action(state_tensor, steps_done)
        action_values = action.tolist()  # Convert tensor to a list of values
        next_state, reward, done, truncated, _ = env.step(action_values) # Extract the first element before converting to scalar

        # Storage of the transition in memory
        memory.push(state, action, next_state, reward)

        # Move to the new state
        state = next_state

        # Perform an optimization step on the current batch
        optimize_model()

    # Update the target network, copying all weights from the policy network
    if i_episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

print('Training complete')

  state_tensor = torch.tensor([state_values], dtype=torch.float)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x18 and 1272x128)