## Coding the learning without Ray using really simple functions

The idea is simple:
Every vehicles see what are the best paths of the network for the last iteration. And they choice the best path with a given probability.

In [1]:
import numpy as np
import Networks

In [2]:
class autonomous_vehicle:
    # p = 0.5
    def __init__(self, path):
        self.__path = path
    
    def path_choice(self, path, network, soc_fac):
        # the following condition makes the system converges toward Nash
        # change this condition to make an faster convergence toward Nash
        cost_self = network.travel_time(int(self.__path)) + soc_fac * network.marginal_cost(int(self.__path))
        cost_path = network.travel_time(int(path)) + soc_fac * network.marginal_cost(int(path))
        p = abs(cost_self - cost_path) /cost_self
        self.__path = (path if np.random.rand() < p else self.__path)
    def get_path_choice(self):
        return self.__path

In [3]:
network_name = 'Braess'
nb_veh = 4
soc_fac = 1 # to be changed
network = Networks.network(network_name, nb_veh)
nb_paths = network.nb_paths

def reward_calculator(soc_fac):
    rew_array = np.zeros(nb_paths)
    for p in range(nb_paths):
        rew_array[p] = - (network.travel_time(p) + soc_fac * network.marginal_cost(p))
    return rew_array
# Apply the actions of every agent at the same time

cars_dict = {}

for i in range(nb_veh):
    cars_dict['car_' + str(i)] = autonomous_vehicle(0)

nb_iter = 20

for i in range(nb_iter):
    paths_flow_dict = {}
    for name_car, cars_veh in cars_dict.items():
        rl_action = cars_veh.get_path_choice()
        if rl_action in paths_flow_dict:
            paths_flow_dict[int(rl_action)] += 1
        else:
            paths_flow_dict[int(rl_action)] = 1

    print(paths_flow_dict)
    # update the path travel times of the network given the path flows
    network.update_flow_from_dict(paths_flow_dict)

    # Calculate states, reward, and done for each agent
    travel_time = {}
    marginal_cost = {}

    for path in range(nb_paths):
        travel_time[path] = network.travel_time(path)
        # network marginal cost ( path ) return the marginal cost of the path
        marginal_cost[path] = network.marginal_cost(path)

    rew_array = reward_calculator(soc_fac)
    best_path = np.argmax(rew_array)
    for name_car, cars_veh in cars_dict.items():
        cars_veh.path_choice(best_path, network, soc_fac)
    # TO DO update the action_dict based on the 

{0: 4}
1
{0: 3, 1: 1}
2
{0: 2, 2: 2}
1
{0: 2, 2: 1, 1: 1}
1
{0: 2, 2: 1, 1: 1}
1
{0: 2, 2: 1, 1: 1}
1
{0: 2, 2: 1, 1: 1}
1
{0: 1, 2: 1, 1: 2}
2
{0: 1, 2: 2, 1: 1}
1
{0: 1, 2: 2, 1: 1}
1
{1: 3, 2: 1}
2
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
{1: 2, 2: 2}
1
