In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import namedtuple, deque

class Car:
    def __init__(self, name, speed, distance, time):
        self.name = name
        self.speed = speed
        self.distance = distance
        self.time = time
        self.rank = -10
    
    def move(self):
        print('The car', self.name, 'is moving at', self.speed, 'km/h')
    
    def stop(self):
        print('The car', self.name, 'has stopped')
    
    def detail(self):
        print('The car', self.name, 'is moving at', self.speed, 'km/h')


class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


class DQNAgent:
    def __init__(self, input_dim, output_dim, learning_rate=0.001, gamma=0.99, epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.995):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.policy_net = DQN(input_dim, output_dim).to(self.device)
        self.target_net = DQN(input_dim, output_dim).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
        self.loss_fn = nn.SmoothL1Loss()
        self.gamma = gamma
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.memory = deque(maxlen=10000)
        self.batch_size = 64

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.randint(self.output_dim)
        else:
            with torch.no_grad():
                state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
                q_values = self.policy_net(state)
                return q_values.argmax().item()

    def update_model(self, transitions):
        batch = Transition(*zip(*transitions))

        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=self.device, dtype=torch.bool)
        non_final_next_states = torch.stack([s for s in batch.next_state if s is not None])

        state_batch = torch.stack(batch.state)
        action_batch = torch.tensor(batch.action, device=self.device)
        reward_batch = torch.tensor(batch.reward, device=self.device)
        
        state_action_values = self.policy_net(state_batch).gather(1, action_batch.unsqueeze(1))

        next_state_values = torch.zeros(len(transitions), device=self.device)
        next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()

        expected_state_action_values = (next_state_values * self.gamma) + reward_batch

        loss = self.loss_fn(state_action_values, expected_state_action_values.unsqueeze(1))

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def update_target_model(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())

    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_end, self.epsilon * self.epsilon_decay)


class Transition:
    def __init__(self, state, action, next_state, reward):
        self.state = state
        self.action = action
        self.next_state = next_state
        self.reward = reward


def simulate_environment(cars, agent, timeout, speed_rate):
    for i in range(len(cars) - 1):
        car_A = cars[i]
        car_B = cars[i + 1]
        car_B.speed = car_A.speed
        while (car_B.time - car_A.time <= timeout):
            state = np.array([car_B.distance, car_B.speed, car_B.time])
            action = agent.choose_action(state)
            if action == 0:  # Decelerate
                car_B.speed -= speed_rate
            car_B.time = car_B.distance / car_B.speed
            next_state = np.array([car_B.distance, car_B.speed, car_B.time])
            reward = -1 if action == 0 else 0  # Penalize deceleration
            agent.memory.append(Transition(state, action, next_state, reward))
            state = next_state
            if len(agent.memory) > agent.batch_size:
                transitions = agent.memory[np.random.choice(len(agent.memory), agent.batch_size, replace=False)]
                agent.update_model(transitions)
                agent.update_target_model()
                agent.decay_epsilon()


car1 = Car('BMW', 10, 100, 10)
car2 = Car('Audi', 10, 100, 10)
car3 = Car('Benz', 10, 100, 10)
car4 = Car('Benz', 10, 100, 10)

cars = [car1, car2, car3, car4]

timeout = 2
speed_rate = 0.1

input_dim = 3  # State space dimension
output_dim = 2  # Action space dimension

agent = DQNAgent(input_dim, output_dim)
agent.batch_size = 64  # Define your batch size for training

simulate_environment(cars, agent, timeout, speed_rate)

print("Final speeds after collision avoidance:")
for car in cars:
    car.detail()


Final speeds after collision avoidance:
The car BMW is moving at 10 km/h
The car Audi is moving at 8.300000000000006 km/h
The car Benz is moving at 7.10000000000001 km/h
The car Benz is moving at 6.2000000000000135 km/h
