In [1]:
import numpy as np
import pandas as pd
import math
import itertools
import matplotlib.pyplot as plt
import torch

In [15]:
class Square_Crossroads():
    
    def __init__(self, seed, dist_cars):
        

        
        self.dist_cars = dist_cars
        
        # initial 4 cars
        self.cars = np.array([0, 1, 2, 3])
        
        # create 4 entrances
        self.state_a = [0, 5]
        self.state_b = [5, 0]
        self.state_c = [5, 10]
        self.state_d = [10, 5]
        self.exits = [self.state_a, self.state_b, self.state_c, self.state_d]

        self.set_seed(seed)
        self.reset()
        
    def step(self, action_space, sign):
           
        done = False
        reward = 0
        reward_time = 40
        reward_boundary = 0
        reward_static = 0
        reward_crash = 0
        
        
        for car in self.cars:
                
            v = velocity(self.states[car][2], action_space[car][0])
#             print("This is the calculated velocity: ", v)
            
            if v < 0: 
                
                reward -= 1000
                
            elif v > 4: 
                reward -= 1000
                
            else:
                self.states[car][2] = v
                self.states[car][3] = action_space[car][1] * 360

#                 print("This is the calculated rotation: ", self.states[car][3])

                new_x, new_y = self.move_car(self.states[car][0], self.states[car][2], self.states[car][3])

                if new_x > 10 or new_x < 0 or new_y > 10 or new_y < 0: 
                    reward_boundary -= 5
                    print("A car tried to go outside the boundaries")

                else:
                    print("Moving the car: ", car)
                    self.states[car][0] = [new_x, new_y]
        #                 print(self.states)
        
                # calculate crash reward
                reward_crash = self.check_crash(self.dist_cars)
                
                reward_static = self.check_static()
                
        reward_success = self.check_success()       
#         print("These are the final states for this step: ", self.states)
        reward = reward_crash + reward_static + reward_success + reward_time + reward_boundary
        
        if done == True:
            print("Success!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")

        return self.states, reward, done
    
    
    def set_seed(self, seed):
        np.random.seed(seed)
    
    
    def new_car(self):
        # add a new car
        
        last_car = self.cars[-1]
        new_car = last_car + 1
        
        # add new car index to list of all cars
        self.cars = np.append(self.cars, new_car)
        
        # take a random spawn exit
        spawn = np.random.choice(len(self.exits))
        
        # remove the spawn exit from the target exits
        target_indexes = list(np.arange(len(self.exits)))
        target_indexes.pop(spawn)
        
        
        # choose a random target exit (spawn exit is already excluded)
        rand_target = np.random.choice(target_indexes)
        
        # added the new car with next_index : [spawn exit, target exit]
        self.states[new_car] = [self.exits[spawn], self.exits[rand_target], 0, 0]
        
    def reset_successful_car(self):
        
        # if a car has successfully passed the crossroads, spawns it again with random entrance and target
        
        spawn = np.random.choice(len(self.exits))
        
        target_indexes = list(np.arange(len(self.exits)))
        target_indexes.pop(spawn)
        
        
        # choose a random target exit (spawn exit is already excluded)
        rand_target = np.random.choice(target_indexes)
        
        return [self.exits[spawn], self.exits[rand_target], 1, 1]
        
    def reset(self):
        
        # reset the whole environment
        
        print("Environment reset with param")
        
        self.states = dict.fromkeys(self.cars)
        
        for car in self.states:
            
            # remove the spawn exit from the target exits
            target_indexes = list(np.arange(len(self.exits)))
            target_indexes.pop(car)
            
            # choose a random target exit (spawn exit is already excluded)
            rand_target = np.random.choice(target_indexes)
            
            # create a dictionary of car : [spawn exit, target exit, velocity, curvature]
            self.states[car] = [self.exits[car], self.exits[rand_target], 1, 1]
        return self.states
    
    def check_crash(self, dist_cars):
        # check if a crash has occured and decrease the reward by 20 for every crash
        
        reward_crash = 0
        
        all_points = [a[0] for a in self.states.values()]
        
        for p0, p1 in itertools.combinations(all_points, 2):
            
            tmp_dist = distance(p0, p1)
            
            if tmp_dist <= dist_cars: reward_crash -= 10
                
        if reward_crash != 0: print("There was a car crash")
            
        return reward_crash
                
    def check_success(self):
        # check if a car has successfully exited and increase reward by 100 for each success
        
        reward_success = 0
        
        # list of cars that should successfully be removed
        exit = []
#         done = False
        for car, car_prop in self.states.items():
            distance_success = distance(self.states[car][0], self.states[car][1])
            
            if distance_success < 2:
                print("We are at a distance: ", distance_success)
                reward_success +=100
            if distance_success < 0.5: reward_success +=500
            if distance_success < 0.1: reward_success +=1000
            if distance_success == 0.01: 
                reward_success +=10000
                print("There was a successful exit")
                
#                 done = True
                exit.append(car)          
#             reward_success += 100
                
#             if self.states[car][0] == self.states[car][1]:
                
#                 exit.append(car)          
#                 reward_success += 100
                
        for c in exit:
            self.states[c] = self.reset_successful_car()
            
        
#         if reward_success != 0: print("There was a successful exit")
        return reward_success
        
    def check_static(self):
        # check if velocity is 0 and decrease the reward by 10 for each car with velocity 0
        
        reward_static = 0
        
        for car_prop in self.states.values():
            
            if car_prop[2] == 0:
                reward_static -= 10
                
        if reward_static != 0: print("There was а static car")
                
        return reward_static
    
    def move_car(self, states, distance, angle_degrees):
        # move the car from point A to point B
        new_x = states[0] + distance * math.cos(angle_degrees * math.pi / 180)
        new_y = states[1] + distance * math.sin(angle_degrees * math.pi / 180)
        return new_x, new_y
    

    
def distance(p0, p1):
    # calculates the distance between 2 points
    return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)

def velocity(v, a):
    # calculates velocity, given previous velocity and acceleration
    return (v+a/3)



In [3]:
# n_times_steps = 3
# seed = 10
# dist_cars = 0.5

# env = Square_Crossroads(n_times_steps, seed, dist_cars)
# env.reset()
# print(env.states)
# action_space = {0:[1,1], 1:[0,1], 2:[1,1], 3:[0,1]}
# sign = torch.tensor([[0.3, 0.7], [0.2, 0.8], [0.68, 0.32], [0.56, 0.44]], dtype=torch.float32)
# total_reward = 0


In [4]:
# for a in range(20):
#     for key, value in action_space.items():
#         acceleration = np.random.beta(0.1, 3, size=None)
#         curvature = np.random.beta(0.1, 0.321, size=None)
#         action_space[key] = [acceleration, curvature]

#     states, reward, done = env.step(action_space, sign)
#     print(reward)
#     total_reward += reward
#     print(env.states)

In [5]:
# total_reward

In [6]:
# acceleration = np.random.beta(0.1, 0.321, size=None)
# curvature = np.random.beta(0.1, 0.321, size=None)

# starting point
# (x, y)
# (0, 5)

# velocity = v + acceleration


# x2 = x + accelaration * curvature
# y2 = y + accleration * curvature


# (1.324231, 5.234355)


# acceleration, curvature

# x2, y2

In [7]:
# reward

In [8]:
# done