In [1]:
import numpy as np
import pandas as pd
import math
import itertools
import matplotlib.pyplot as plt

In [2]:
class Square_Crossroads():
    
    def __init__(self, n_time_steps, seed, dist_cars):
        
        # n_time_steps, length and width are not used yet, but will probably be used later
#         self.n_time_steps = n_time_steps
#         self.length = 10
#         self.width = 10
        
        
        self.dist_cars = dist_cars
        
        # initial 4 cars
        self.cars = np.array([0, 1, 2, 3])
        
        # create 4 entrances
        self.state_a = [0, 5]
        self.state_b = [5, 0]
        self.state_c = [5, 10]
        self.state_d = [10, 5]
        self.exits = [self.state_a, self.state_b, self.state_c, self.state_d]

        self.set_seed(seed)
        self.reset()
        
    def step(self, action_space):
        
        # not done yet
        # crash reward 
        # static reward (if velocity = 0, -5?)
        # timestamp reward (-1)
        
        # calculate the exact place we will get to using curvature = action_space[0] and acceleration = action_space[1]
        
        
        done = False
        reward = 0
        reward_time = -1
        reward_boundary = 0
        
        for car in self.cars:
            
#             print(f"Car {car} is taking a step in the environment with these actions: \n acceleration = {action_space[car][0]}, curvature = {action_space[car][1]}")
        
            self.states[car][2] = velocity(self.states[car][2], action_space[car][0])
            self.states[car][3] = action_space[car][1] * 360
            new_x, new_y = self.move_car(self.states[car][0], self.states[car][2], self.states[car][3])
            if new_x > 10 or new_x < 0 or new_y > 10 or new_y < 0: 
                reward_boundary += -5
#                 print("A car tried to go outside the boundaries")
            else:
                self.states[car][0] = [new_x, new_y]
            
        # Take a step
        
        # if next step moves out of the boundaries -> done = True
        # 2 cars crash -> get deleted -> spawn 2 more at their place
        
#         print(self.states)
        
        # calculate crash reward
        reward_crash = self.check_crash(self.dist_cars)
        reward_success = self.check_success()
        reward_static = self.check_static()
        
        reward = reward_crash + reward_static + reward_success + reward_time + reward_boundary
        if reward > 100:
            done = True
        return self.states, reward, done
    
    
    def set_seed(self, seed):
        np.random.seed(seed)
    
    
    def new_car(self):
        # add a new car
        
        last_car = self.cars[-1]
        new_car = last_car + 1
        
        # add new car index to list of all cars
        self.cars = np.append(self.cars, new_car)
        
        # take a random spawn exit
        spawn = np.random.choice(len(self.exits))
        
        # remove the spawn exit from the target exits
        target_indexes = list(np.arange(len(self.exits)))
        target_indexes.pop(spawn)
        
        
        # choose a random target exit (spawn exit is already excluded)
        rand_target = np.random.choice(target_indexes)
        
        # added the new car with next_index : [spawn exit, target exit]
        self.states[new_car] = [self.exits[spawn], self.exits[rand_target], 0, 0]
        
    def reset_successful_car(self):
        
        # if a car has successfully passed the crossroads, spawns it again with random entrance and target
        
        spawn = np.random.choice(len(self.exits))
        
        target_indexes = list(np.arange(len(self.exits)))
        target_indexes.pop(spawn)
        
        
        # choose a random target exit (spawn exit is already excluded)
        rand_target = np.random.choice(target_indexes)
        
        return [self.exits[spawn], self.exits[rand_target], 0, 0]
        
    def reset(self):
        
        # reset the whole environment
        
        print("Environment reset with param")
        
        self.states = dict.fromkeys(self.cars)
        
        for car in self.states:
            
            # remove the spawn exit from the target exits
            target_indexes = list(np.arange(len(self.exits)))
            target_indexes.pop(car)
            
            # choose a random target exit (spawn exit is already excluded)
            rand_target = np.random.choice(target_indexes)
            
            # create a dictionary of car : [spawn exit, target exit, velocity, curvature]
            self.states[car] = [self.exits[car], self.exits[rand_target], 0, 0]
        return self.states
    
    def check_crash(self, dist_cars):
        # check if a crash has occured and decrease the reward by 20 for every crash
        
        reward_crash = 0
        
        all_points = [a[0] for a in self.states.values()]
        
        for p0, p1 in itertools.combinations(all_points, 2):
            
            tmp_dist = distance(p0, p1)
            
            if tmp_dist <= dist_cars: reward_crash -= 20
                
        if reward_crash != 0: print("There was a car crash")
            
        return reward_crash
                
    def check_success(self):
        # check if a car has successfully exited and increase reward by 100 for each success
        
        reward_success = 0
        
        # list of cars that should successfully be removed
        exit = []
        
        for car, car_prop in self.states.items():
            if self.states[car][0] == self.states[car][1]:
                
                exit.append(car)          
                reward_success += 100
                
        for c in exit:
            self.states[c] = self.reset_successful_car()
            
        
        if reward_success != 0: print("There was successful exit")
            
        return reward_success
        
    def check_static(self):
        # check if velocity is 0 and decrease the reward by 10 for each car with velocity 0
        
        reward_static = 0
        
        for car_prop in self.states.values():
            
            if car_prop[2] == 0:
                reward_static += -10
                
        if reward_static != 0: print("There was а static car")
                
        return reward_static
    
    def move_car(self, states, distance, angle_degrees):
        # move the car from point A to point B
        new_x = states[0] + distance * math.cos(angle_degrees * math.pi / 180)
        new_y = states[1] + distance * math.sin(angle_degrees * math.pi / 180)
        return new_x, new_y
    

    
def distance(p0, p1):
    # calculates the distance between 2 points
    return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)

def velocity(v, a):
    # calculates velocity, given previous velocity and acceleration
    return (v+a/10)



In [3]:
n_times_steps = 3
seed = 10
dist_cars = 0.5

env = Square_Crossroads(n_times_steps, seed, dist_cars)
env.reset()
print(env.states)
action_space = {0:[0,0], 1:[0,0], 2:[0,0], 3:[0,0]}
total_reward = 0


Environment reset with param
Environment reset with param
{0: [[0, 5], [5, 10], 0, 0], 1: [[5, 0], [0, 5], 0, 0], 2: [[5, 10], [5, 0], 0, 0], 3: [[10, 5], [5, 0], 0, 0]}


In [5]:
# for a in range(10):
#     for key, value in action_space.items():
#         acceleration = np.random.beta(0.69, 0.321, size=None)
#         curvature = np.random.beta(0.1, 0.321, size=None)
#         action_space[key] = [acceleration, curvature]

#     states, reward, done = env.step(action_space)
#     print(reward)
#     total_reward += reward

A car tried to go outside the boundaries
A car tried to go outside the boundaries
A car tried to go outside the boundaries
-16
A car tried to go outside the boundaries
A car tried to go outside the boundaries
-11
A car tried to go outside the boundaries
A car tried to go outside the boundaries
-11
A car tried to go outside the boundaries
A car tried to go outside the boundaries
A car tried to go outside the boundaries
-16
A car tried to go outside the boundaries
-6
A car tried to go outside the boundaries
-6
A car tried to go outside the boundaries
-6
A car tried to go outside the boundaries
A car tried to go outside the boundaries
-11
-1
-1


In [6]:
# total_reward

-85

In [7]:
# acceleration = np.random.beta(0.1, 0.321, size=None)
# curvature = np.random.beta(0.1, 0.321, size=None)

# starting point
# (x, y)
# (0, 5)

# velocity = v + acceleration


# x2 = x + accelaration * curvature
# y2 = y + accleration * curvature


# (1.324231, 5.234355)


# acceleration, curvature

# x2, y2

In [10]:
# reward

In [9]:
# done

False