## Coding the learning without Ray using really simple functions

The idea is simple:
Every vehicles see what are the best paths of the network for the last iteration. And they choice the best path with a given probability.

In [1]:
import numpy as np
import Networks

In [2]:
class autonomous_vehicle:
    # p = 0.5
    def __init__(self, path):
        self.__path = path
    
    def path_choice(self, path, network, soc_fac):
        # the following condition makes the system converges toward Nash
        # change this condition to make an faster convergence toward Nash
        cost_self = network.travel_time(int(self.__path)) + soc_fac * network.marginal_cost(int(self.__path))
        cost_path = network.travel_time(int(path)) + soc_fac * network.marginal_cost(int(path))
        p = abs(cost_self - cost_path) /cost_self
        self.__path = (path if np.random.rand() < p else self.__path)
    def get_path_choice(self):
        return self.__path

In [7]:
network_name = 'Braess'
nb_veh = 4
soc_fac = 0 # to be changed
network = Networks.network(network_name, nb_veh)
nb_paths = network.nb_paths

path_choice_filename = 'path_choice.csv'
flow_filename = 'flow_data.csv'
wr = CSVGenerator(path_choice_filename, flow_filename)

def reward_calculator(soc_fac):
    rew_array = np.zeros(nb_paths)
    for p in range(nb_paths):
        rew_array[p] = - (network.travel_time(p) + soc_fac * network.marginal_cost(p))
    return rew_array
# Apply the actions of every agent at the same time

cars_dict = {}

for i in range(nb_veh):
    cars_dict['car_' + str(i)] = autonomous_vehicle(0)

nb_iter = 20

for i in range(nb_iter):
    print("step " + str(i))
    paths_flow_dict = {}
    car_path_dict = {}
    for name_car, cars_veh in cars_dict.items():
        rl_action = cars_veh.get_path_choice()
        car_path_dict[name_car] = rl_action
        # print(name_car + ": " + str(rl_action))
        if rl_action in paths_flow_dict:
            paths_flow_dict[int(rl_action)] += 1
        else:
            paths_flow_dict[int(rl_action)] = 1
    # update the path travel times of the network given the path flows
    network.update_flow_from_dict(paths_flow_dict)

    # Calculate states, reward, and done for each agent
    travel_time = {}
    marginal_cost = {} 
    path_flow_dict_full = {}
    travel_time_to_print = {}
    
    for path in range(nb_paths):
        if path in paths_flow_dict.keys():
            path_flow_dict_full["path_" + str(path)] = paths_flow_dict[path]
        else:
            path_flow_dict_full["path_" + str(path)] = 0
        travel_time[path] = network.travel_time(path)
        travel_time_to_print["travel_time_path_" + str(path)] = travel_time[path]
        # network marginal cost ( path ) return the marginal cost of the path
        marginal_cost[path] = network.marginal_cost(path)
    print(car_path_dict)
    print(path_flow_dict_full)
    print(travel_time_to_print)
    rew_array = reward_calculator(soc_fac)
    best_path = np.argmax(rew_array)
    for name_car, cars_veh in cars_dict.items():
        cars_veh.path_choice(best_path, network, soc_fac)
    # TO DO update the action_dict based on the 
    wr.generateCSV(car_path_dict, travel_time_to_print)


step 0
{'car_0': 0, 'car_1': 0, 'car_2': 0, 'car_3': 0}
{'path_0': 4, 'path_1': 0, 'path_2': 0}
{'travel_time_path_0': 4.25, 'travel_time_path_1': 4.0, 'travel_time_path_2': 4.0}
step 1
{'car_0': 0, 'car_1': 0, 'car_2': 0, 'car_3': 0}
{'path_0': 4, 'path_1': 0, 'path_2': 0}
{'travel_time_path_0': 4.25, 'travel_time_path_1': 4.0, 'travel_time_path_2': 4.0}
step 2
{'car_0': 0, 'car_1': 0, 'car_2': 0, 'car_3': 0}
{'path_0': 4, 'path_1': 0, 'path_2': 0}
{'travel_time_path_0': 4.25, 'travel_time_path_1': 4.0, 'travel_time_path_2': 4.0}
step 3
{'car_0': 0, 'car_1': 0, 'car_2': 0, 'car_3': 0}
{'path_0': 4, 'path_1': 0, 'path_2': 0}
{'travel_time_path_0': 4.25, 'travel_time_path_1': 4.0, 'travel_time_path_2': 4.0}
step 4
{'car_0': 1, 'car_1': 0, 'car_2': 0, 'car_3': 0}
{'path_0': 3, 'path_1': 1, 'path_2': 0}
{'travel_time_path_0': 4.0, 'travel_time_path_1': 4.0, 'travel_time_path_2': 3.75}
step 5
{'car_0': 1, 'car_1': 0, 'car_2': 0, 'car_3': 2}
{'path_0': 2, 'path_1': 1, 'path_2': 1}
{'travel_

In [4]:
import csv
import os.path

class CSVGenerator:

	"""
	
	initialise with path_choice_file name and flow_filename

	for example: 
	
	path_choice_filename = 'path_choice.csv'
	flow_filename = 'flow_data.csv'
	object = wr = CSVGenerator(path_choice_filename, flow_filename)

	"""

	def __init__(self, path_choice_filename, flow_filename):
		self.pathChoiceCsv = open(path_choice_filename, 'a')
		self.pathChoiceHeader = True
		
		self.flowDataCSV = open(flow_filename, 'a')
		self.flowDataHeader = True

	def writePathChoice(self, path_choice):
		"""
		Writes path choices in a file
		path_choice must be a dictionary

		For example:
		{"A": 0, "B": 2, "C": 2, "D": 1}
		"""
		fieldnames = []
		for key in path_choice.keys():
			fieldnames.append(key)

		writer = csv.DictWriter(self.pathChoiceCsv, fieldnames=fieldnames, lineterminator = '\n')

		# print("pathchoice header = ", self.pathChoiceHeader)
		if self.pathChoiceHeader:
			# print("Writing path choice header")
			writer.writeheader()
			self.pathChoiceHeader = False
		
		writer.writerow(path_choice)

	def writeFlowData(self, flow_data):

		"""
		Writes flow data in a file
		flow_data must be a dictionary

		For example:
		{"path_a": 4.00, "path_b": 4.00, "path_c": 4.75}
		"""

		fieldnames = []
		for key in flow_data.keys():
			fieldnames.append(key)

		writer = csv.DictWriter(self.flowDataCSV, delimiter=',', fieldnames=fieldnames, lineterminator = '\n')
		
		if self.flowDataHeader:
			writer.writeheader()
			self.flowDataHeader = False

		writer.writerow(flow_data)

	def generateCSV(self, path_choice, flow_data):
		self.writePathChoice(path_choice)
		self.writeFlowData(flow_data)
