In [15]:
import numpy as np
import pandas as pd
import math
import itertools
import matplotlib.pyplot as plt

In [16]:
class Square_Crossroads():
    
    def __init__(self, n_time_steps, seed, dist_cars):
        
        # n_time_steps, length and width are not used yet, but will probably be used later
        self.n_time_steps = n_time_steps
        self.length = 10
        self.width = 10
        
        
        self.dist_cars = dist_cars
        
        # initial 4 cars
        self.cars = np.array([0, 1, 2, 3])
        
        # create 4 entrances
        self.state_a = [0, 5]
        self.state_b = [5, 0]
        self.state_c = [5, 10]
        self.state_d = [10, 5]
        self.exits = [self.state_a, self.state_b, self.state_c, self.state_d]

        self.set_seed(seed)
        self.reset()
                    
        
    def step(self, action_space):
        
        # not done yet
        # crash reward 
        # static reward (if velocity = 0, -5?)
        # timestamp reward (-1)
        
        # calculate the exact place we will get to using curvature = action_space[0] and acceleration = action_space[1]
        
        
        done = False
        reward = 0
        time_reward = -1
        
        for car in self.cars:
            
#             print(f"Car {car} is taking a step in the environment with these actions: \n acceleration = {action_space[car][0]}, curvature = {action_space[car][1]}")
        
            self.states[car][2] = velocity(self.states[car][2], action_space[car][0])
            self.states[car][3] = action_space[car][1] * 360
            new_x, new_y = move_car(self.states[car][0], self.states[car][2], self.states[car][3])
            self.states[car][0] = [new_x, new_y]
            
        # Take a step
        
        # if next step moves out of the boundaries -> done = True
        # 2 cars crash -> get deleted -> spawn 2 more at their place
        
        print(self.states)
        
        # calculate crash reward
        reward_crash = self.check_crash(self.dist_cars)
        reward_success = self.check_success()
        reward_static = self.check_static()
        
        reward = reward_crash + reward_static + reward_success + time_reward
        return self.states, reward, done
    
    
    def set_seed(self, seed):
        np.random.seed(seed)
    
    
    def new_car(self):
        # add a new car
        
        last_car = self.cars[-1]
        new_car = last_car + 1
        
        # add new car index to list of all cars
        self.cars = np.append(self.cars, new_car)
        
        # take a random spawn exit
        spawn = np.random.choice(len(self.exits))
        
        # remove the spawn exit from the target exits
        target_indexes = list(np.arange(len(self.exits)))
        target_indexes.pop(spawn)
        
        
        # choose a random target exit (spawn exit is already excluded)
        rand_target = np.random.choice(target_indexes)
        
        # added the new car with next_index : [spawn exit, target exit]
        self.states[new_car] = [self.exits[spawn], self.exits[rand_target], 0, 0]
        
    def reset(self):
        
        # reset the whole environment
        
        print("Environment reset with param")
        
        self.states = dict.fromkeys(self.cars)
        
        for car in self.states:
            
            # remove the spawn exit from the target exits
            target_indexes = list(np.arange(len(self.exits)))
            target_indexes.pop(car)
            
            # choose a random target exit (spawn exit is already excluded)
            rand_target = np.random.choice(target_indexes)
            
            # create a dictionary of car : [spawn exit, target exit, velocity, curvature]
            self.states[car] = [self.exits[car], self.exits[rand_target], 0, 0]
    
    
    def check_crash(self, dist_cars):
        # check if a crash has occured and decrease the reward by 20 for every crash
        
        reward_crash = 0
        
        all_points = [a[0] for a in self.states.values()]
        
        for p0, p1 in itertools.combinations(all_points, 2):
            
            tmp_dist = distance(p0, p1)
            
            if tmp_dist <= dist_cars: reward_crash -= 20
                
        return reward_crash
                
    def check_success(self):
        # check if a car has successfully exited and increase reward by 100 for each success
        
        reward_success = 0
        
        # list of cars that should successfully be removed
        exit = []
        
        for car, car_prop in self.states.items():
            if self.states[car][0] == self.states[car][1]:
                
                exit.append(car)          
                reward_success += 100
                
        for c in exit:
            del self.states[c]
            
        return reward_success
        
    def check_static(self):
        # check if velocity is 0 and decrease the reward by 10 for each car with velocity 0
        
        reward_static = 0
        
        for car_prop in self.states.values():
            
            if car_prop[2] == 0:
                reward_static += -10
                
        return reward_static
    
def distance(p0, p1):
    # calculates the distance between 2 points
    return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)

def velocity(v, a):
    # calculates velocity, given previous velocity and acceleration
    return (v+a/10)

def move_car(states, distance, angle_degrees):
    # move the car from point A to point B
    new_x = states[0] + distance * math.cos(angle_degrees * math.pi / 180)
    new_y = states[1] + distance * math.sin(angle_degrees * math.pi / 180)
    return new_x, new_y

In [17]:
n_times_steps = 3
seed = 10
dist_cars = 0.5

env = Square_Crossroads(n_times_steps, seed, dist_cars)

print(env.states)
action_space = {0:[0,0], 1:[0,0], 2:[0,0], 3:[0,0]}


Environment reset with param
{0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}


In [18]:

for a in range(10):
    for key, value in action_space.items():
        acceleration = np.random.beta(0.69, 0.321, size=None)
        curvature = np.random.beta(0.1, 0.321, size=None)
        action_space[key] = [acceleration, curvature]


        states, reward, done = env.step(action_space)

{0: [[0.03527920876150809, 4.99949408764892], [5, 10], 0.035282836027522566, 359.1784210354735], 1: [[5.0, 0.0], [5, 10], 0.0, 0], 2: [[5.0, 10.0], [0, 5], 0.0, 0], 3: [[10.0, 5.0], [0, 5], 0.0, 0]}
{0: [[0.10583762628452428, 4.99848226294676], [5, 10], 0.07056567205504513, 359.1784210354735], 1: [[5.097422541586141, 0.00036255586803496257], [5, 10], 0.09742321620569147, 0.21322401203252736], 2: [[5.0, 10.0], [0, 5], 0.0, 0], 3: [[10.0, 5.0], [0, 5], 0.0, 0]}
{0: [[0.21167525256904857, 4.996964525893519], [5, 10], 0.1058485080825677, 359.1784210354735], 1: [[5.292267624758424, 0.0010876676041048877], [5, 10], 0.19484643241138294, 0.21322401203252736], 2: [[5.022780374270432, 10.000691412535092], [0, 5], 0.022790864467909462, 1.7384640828063829], 3: [[10.0, 5.0], [0, 5], 0.0, 0]}
{0: [[0.35279208761508096, 4.994940876489199], [5, 10], 0.14113134411009026, 359.1784210354735], 1: [[5.584535249516849, 0.0021753352082097754], [5, 10], 0.2922696486170744, 0.21322401203252736], 2: [[5.0683411

In [6]:
reward

-1

In [7]:
# acceleration = np.random.beta(0.1, 0.321, size=None)
# curvature = np.random.beta(0.1, 0.321, size=None)

# starting point
# (x, y)
# (0, 5)

# velocity = v + acceleration


# x2 = x + accelaration * curvature
# y2 = y + accleration * curvature


# (1.324231, 5.234355)


# acceleration, curvature

# x2, y2

In [8]:
reward

-1

In [9]:
done

False

In [10]:
# env.new_car()
# print(env.states)

In [11]:
# new_x = x + distance * Math.Cos(angle_degrees * Math.Pi / 180)
# new_y = y + distance * Math.Sin(angle_degrees * Math.Pi / 180)


# # boundary conditions
# 0 <= x <= 10
# 0 <= y <= 10


# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# {0: [[0, 5], [5, 10], 0, 0], 1: [[0, 5], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}

# # crash
# # delete crashed cars

# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# {0: [[0, 5], [5, 10], 0, 0], 1: [[0, 5], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}
# # crash, reset the crashed cars to their initial spawn place, major negative reward
# {0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [5, 10], 0, 0], 2: [[5, 10], [0, 5], 0, 0], 3: [[10, 5], [0, 5], 0, 0]}



# initialize_params, predict = stax.serial(
#                                         # common base layer
#                                         stax.serial(
#                                             ### fully connected DNN
#                                             Dense(128), # 128 hidden neurons
#                                             Relu, # ReLu activation
#                                             ),
#                                         # actor and critic output heads
#                                         FanOut(2), # split architecture pipeline into two heads using FanOut
#                                         stax.parallel(
#                                             # actor head
#                                             stax.serial(
#                                                 Dense([4_cars, alpha, beta]), # 2 output neurons (actor)
#                                             ),
#                                             # critic head
#                                             Dense(1), # 1 output neuron (critic), no activation
#                                         ),
#                                 )


# for each car:
#     car_1_alpha, car_1_beta
#     acceleration = numpy.random.beta(car_1_alpha, car_1_beta, size=None)
#     curvature = numpy.random.beta(car_1_alpha, car_1_beta, size=None)
#     0 - 1 -> acceleration
#     numbers between ? and ? -> curvature
