In [1]:
import numpy as np
import time

### Define vehicles and tasks sets

In [2]:
vehicles = [0,1,2,3]
tasks = [0,1,2]

### Create random utility matrix

We use a matrix to store the utilities of the vehicles for the different possible allocations.
The advantage of using a matrix compared to storing all the possible combinations in a dictionary, a hashmap or similar, is that we save a lot of space because we don't have to store the keys, and the acces time is the same. The other advantage is that we can loop easily on the matrix and use optimized numpy functions over this matrix. Using a matrix could also be interesting to move the computations on GPU to speed it up for high dimentionnal instances.

In [3]:
# Clean version :

def getShape(nb_vehicles, nb_tasks):
    """ Compute the shape for this settings
    
    Parameters :
        nb_vehicles : int, the number of vehicles
        nb_tasks : int, the number of tasks
        
    Returns :
        a tuple containing the shape (tasks^vehicles * vehicles),
        that is the shape for the utility matrix.
        
    """
    list_dim = [nb_tasks]*nb_vehicles +[nb_vehicles]# matrix of shape tasks^vehicles * vehicles
    return tuple(list_dim)

shapes = getShape(len(vehicles),len(tasks))
shapes

(3, 3, 3, 3, 4)

In [4]:
ut = np.random.randint(0,10,shapes) # create utility matrix with random utilities

In [5]:
ut[0][0][0][0] # get utility of all agents when all do task 0

array([5, 4, 3, 2])

In [6]:
ind = tuple([0,0,0,0]) # accessing the matrix from array as index
ut[ind]

array([5, 4, 3, 2])

In [157]:
ut[0][1][2][2] # get utility of all agents when allocation for vehicles to task is = 0,1,2,2 
# (means vehicule 0 do task 0, vehicule 1 do task 1, vehicule 2 do task 2, vehicule 3 do task 2)

array([9, 9, 5, 8])

### Defining usefull functions

In [7]:
def replaceAlloc(allocation, v, t):
    """ Compute the new allocation with task t asigned to vehicle v
    
    Parameters :
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        v : int, the vehicle id
        t : int, the task id
    
    Returns :
        List(int), the new allocation
    
    """
    return list(allocation[:v])+[t]+list(allocation[v+1:])

In [8]:
def is_EN(utilities, allocation, vehicles, tasks):
    """ Check if the current allocation is a Nash Equilibrium or not
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        vehicle : List(int) the list of vehicle ids
        tasks : List(int) the list of task ids
    
    Returns :
        Tuple(boolean, int)
        A tuple containing a boolean (True if this allocation is a Nash Equilibrium, else False)
        and an integer that is the id of a vehicle that can increase its utility
        by changing unilateraly its allocation (if not EN, -1)
    
    """
    for v in range(len(vehicles)) : # for each vehicle
        current_task = allocation[v]
        current_utility = utilities[tuple(allocation)][v]
        for t in range(len(tasks)) :
            if t != current_task : # check all other tasks
                temp_ind = replaceAlloc(allocation, v, t) # allocating task t to vehicle v
                utility = utilities[tuple(temp_ind)][v]
                if utility > current_utility : # changing to another task gives more utility -> Not NE
                    return (False, v)
    return (True, -1)

We also return the id of one vehicle that can increase its utility by changing its allocation, if the solution is not a Nash Equilibrium. 
It didn't increase the computation cost and avoids looping another time later on the utility table to find one in the Best Response Dynamics, it's all benefits.

##### Nash Equilibrium test example on small dimension

In [9]:
# Create setup : 2 vehicles, 3 tasks
v = [0,1] # don't change, it's 2D example
t = [0,1,2]
ut_test = np.random.randint(0,10,(len(t),len(t),2))
ut_test

array([[[8, 0],
        [8, 2],
        [2, 2]],

       [[8, 0],
        [0, 4],
        [8, 2]],

       [[5, 7],
        [0, 3],
        [7, 3]]])

In [10]:
# Check EN for allocation (0,0)
alloc = [0,0] # set allocation to check (0,1) -> first vehicle do task 0 and second do task 1
is_EN(ut_test, alloc, v, t) # random example

(False, 1)

In [11]:
alloc = [0,1]
ut_test[tuple(alloc)] = [0,0] # set allocation (0,1) to the lowest value for each vehicle
ut_test[2,1] = [1,0] # set another allocation to a better score for one vehicle (in case matrix is full zero)
is_EN(ut_test,alloc , v, t) # -> Is necessarly not an EN (result must be False)

(False, 0)

In [12]:
alloc = [0,1]
ut_test[tuple(alloc)] = [10,10] # set allocation (0,1) to the highest value for each vehicle
is_EN(ut_test, alloc, v, t) # -> Is necessarly  an EN (result must be True)

(True, -1)

In [13]:
ut_test_z = np.zeros((len(t),len(t),2)) # set all matrix to 0 (same value everywhere)
ut_test_z

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [14]:
alloc = [2,1] # when Zero everywhere, no solution is strictly better than the current
is_EN(ut_test_z, alloc, v, t) # -> Is necessarly an EN (result must be True)

(True, -1)

### Best Response Dynamics

In [15]:
def getBestTask(utilities, allocation, v, tasks):
    """ Compute the best task for vehicle v
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        v : int, the vehicle id
        tasks : List(int) the list of task ids
        
    Returns : 
        int, the best task for vehicle v
    """
    best = np.argmax([utilities[tuple(replaceAlloc(allocation, v, t))][v] for t in range(len(tasks))])
    return best

In [16]:
def bestResponseDynamic(utilities, vehicles, tasks, maxsteps):
    """ Try to compute a Nash Equilibrium allofaction using Best Response Dynamics
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        vehicles : List(int), the list of vehicle ids
        tasks : List(int) the list of task ids
        maxteps : int, the steps limit of the algorithm
        
    Returns : 
        List(int), a Nash Equilibrium allocation if one was found (no guarantee)
        
    """
    allocation = np.random.randint(0,len(tasks),len(vehicles)) # initial random allocation
    end, id_change = is_EN(utilities, allocation, vehicles, tasks)
    steps = 0
    while not(end) and steps < maxsteps:
        # vehicle id_change has interest to change to a better allocation
        best = getBestTask(utilities, allocation, id_change, tasks) # get its best unilateral allocation
        allocation = replaceAlloc(allocation, id_change, best) # set next allocation for id_change
        end, id_change = is_EN(utilities, allocation, vehicles, tasks)
        steps += 1
    if not(end) and steps >= maxsteps: # cut the exection if maxsteps reached
        print("Execution stopped : maximum step overflowed, no EN found.")
    return list(allocation)

##### Best Response Dynamics test example on the initial matrix

In [17]:
ut[2,2,1,1] = [10,10,10,10] # we create a global optimal affectation -> At least one EN in utilities table

In [18]:
bestResponseDynamic(ut, vehicles, tasks, 1000) # the global optimum is reached (NE)

[2, 2, 1, 1]

In [19]:
bestResponseDynamic(ut, vehicles, tasks, 1000) # another Nash Equilibrium is sometimes reached

Execution stopped : maximum step overflowed, no EN found.


[2, 1, 0, 0]

### Fictitious Play

In [20]:
shapes = getShape(len(vehicles),len(tasks))
print(shapes)
ut = np.random.randint(0,10,shapes) # create utility matrix with random utilities

(3, 3, 3, 3, 4)


In [21]:
def computeFrequency(proposals, tasks):
    """ Compute the empirical frequency of each proposal
    
    Parameters :
        proposals : List(int), the allocation proposed
        tasks : List(int) the list of task ids
        
    Returns : 
        List(float), the empirical frequency of each proposal
    
    """
    return [np.count_nonzero(np.array(proposals) == t)/len(proposals) for t in range(len(tasks))]

In [22]:
# testing frequencies computation
proposals = [0,1,2,1,1,2,0,0,1,2,2,1,2,0,1,1,2,1,2,0,1] # proposals sequence made by an agent
taks = [0,1,2] # define tasks

In [23]:
computeFrequency(proposals,tasks) # compute frequency of each task in proposals

[0.23809523809523808, 0.42857142857142855, 0.3333333333333333]

In [24]:
def computeFullFrequencyMatrix(frequencies, vehicles, tasks): # not used
    """
        Compute the joined frequency matrix of proposed allocation for each vehicle
    """
    shapes = getShape(len(vehicles),len(tasks))
    fMat = np.zeros(shapes)
    allAlloc = [x for x,_ in np.ndenumerate(ut)]
    for alloc in allAlloc :
        fMat[alloc] = np.prod([frequencies[v][alloc[v]] for v in range(len(vehicles))])
    return fMat

In [25]:
# testing frequency matrix computation
frequencies = np.random.random((4,10)) # random frequencies

In [26]:
frequencies

array([[0.77218333, 0.56594313, 0.40350632, 0.90114795, 0.70722221,
        0.06672194, 0.93242844, 0.80821439, 0.9235687 , 0.53211887],
       [0.45635387, 0.60894687, 0.05908517, 0.49949698, 0.13064605,
        0.95352202, 0.93259118, 0.94362164, 0.61045116, 0.97096061],
       [0.08030695, 0.01369158, 0.71459327, 0.82379057, 0.18958987,
        0.00731452, 0.96149911, 0.18237181, 0.14040539, 0.32068531],
       [0.97604976, 0.58967468, 0.64995107, 0.48134723, 0.4866512 ,
        0.18965693, 0.83336898, 0.39972483, 0.13940649, 0.8186635 ]])

In [27]:
computeFullFrequencyMatrix(frequencies, vehicles, tasks)

array([[[[[2.76214986e-02, 2.76214986e-02, 2.76214986e-02,
           2.76214986e-02],
          [1.66873649e-02, 1.66873649e-02, 1.66873649e-02,
           1.66873649e-02],
          [1.83931428e-02, 1.83931428e-02, 1.83931428e-02,
           1.83931428e-02]],

         [[4.70920442e-03, 4.70920442e-03, 4.70920442e-03,
           4.70920442e-03],
          [2.84503797e-03, 2.84503797e-03, 2.84503797e-03,
           2.84503797e-03],
          [3.13585698e-03, 3.13585698e-03, 3.13585698e-03,
           3.13585698e-03]],

         [[2.45783680e-01, 2.45783680e-01, 2.45783680e-01,
           2.45783680e-01],
          [1.48488755e-01, 1.48488755e-01, 1.48488755e-01,
           1.48488755e-01],
          [1.63667235e-01, 1.63667235e-01, 1.63667235e-01,
           1.63667235e-01]]],


        [[[3.68574175e-02, 3.68574175e-02, 3.68574175e-02,
           3.68574175e-02],
          [2.22671906e-02, 2.22671906e-02, 2.22671906e-02,
           2.22671906e-02],
          [2.45433367e-02, 2.454333

In [28]:
def computePartialFrequencyMatrix(utilities, frequencies, vehicles, tasks, v):
    """
        Compute the expected utility for vehicle v foreach task, considering for each 
        other vehicle a random allocation choice with probability equal to empirical frequency observed 
        
    """
    temp_vehicles = vehicles[:v] + vehicles[v+1:] # create a list of index without vehicle v
    temp_alloc = np.zeros([len(tasks)]*(len(vehicles)-1)) # create a 0 array of shape (vehicles-1)^task
    allAlloc = [x for x,_ in np.ndenumerate(temp_alloc)] # enumerate all possible allocations for these vehicles
    expectations = []
    for t in range(len(tasks)):
        expected = 0
        for alloc in allAlloc : # for each possible allocation for vehicles without v
            # compute proba for these vehicles to do this allocation : 
            proba = np.prod([frequencies[temp_vehicles[v]][alloc[v]] for v in range(len(temp_vehicles))])
            # recreating full allocation with v :
            index = [0]*len(vehicles)
            for i in range(len(temp_vehicles)) :
                index[temp_vehicles[i]] = alloc[i]
            index[v] = t
            # get the utility for v if it do task t with this allocation for the other vehicles
            utility = utilities[tuple(index)][v]
            expected+= utility * proba # add proba time the utility of v to the expectation
        expectations.append(expected)
    return expectations

In [29]:
# testing expectation computation
frequencies = np.random.random((4,10)) # random frequencies (note that sum != 1, it's just for example)
frequencies

array([[0.4142567 , 0.50331641, 0.79156941, 0.33243127, 0.19619193,
        0.87384948, 0.44064743, 0.77407008, 0.30313702, 0.59035715],
       [0.4939105 , 0.29034932, 0.64376743, 0.13747091, 0.13375759,
        0.50306303, 0.06126503, 0.83430303, 0.25725455, 0.58932974],
       [0.0181441 , 0.62627871, 0.49613792, 0.82825344, 0.5310992 ,
        0.22601194, 0.87393285, 0.38175756, 0.12310519, 0.71181585],
       [0.58922353, 0.75358145, 0.84801749, 0.24807983, 0.94206083,
        0.55569962, 0.47079939, 0.66734483, 0.50304327, 0.94048099]])

In [30]:
for v in vehicles : # get the task with the highest utility expectation for each vehicle
    expect = computePartialFrequencyMatrix(ut, frequencies, vehicles, tasks, v)
    print(str(expect)+" --> task "+str(np.argmax(expect)))

[17.374747842579325, 16.55283501957941, 13.655112722786956] --> task 0
[19.794703891116352, 20.715119884412186, 19.330134663280347] --> task 1
[21.1377895178693, 22.05775842277379, 22.12597270220654] --> task 2
[11.297063250130083, 12.785203365158171, 18.442974504311557] --> task 2


In [31]:
def fictitiousPlay(utilities, vehicles, tasks, maxsteps):
    """ Play Fictitious Play until maxsteps
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        vehicles : List(int), the list of vehicle ids
        tasks : List(int) the list of task ids
        maxteps : int, the steps limit of the algorithm
        
    Returns : 
        List(int), the found allocation after maxsteps steps (no optimality guarantee)
        
    """
    passedPropositions = [[t for t in tasks] for v in vehicles]
    steps = 0
    while steps < maxsteps:
        for v in vehicles :
            f = [computeFrequency(passedPropositions[v],tasks) for v in vehicles]
            expect = computePartialFrequencyMatrix(utilities, f, vehicles, tasks, v)
            bestTask = np.argmax(expect)
            passedPropositions[v].append(bestTask)
        #print([passedPropositions[v][-1] for v in vehicles])
        steps += 1
    return list([passedPropositions[v][-1] for v in vehicles])

In [32]:
# testing FP compared to BRD on some examples :

In [33]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

Execution stopped : maximum step overflowed, no EN found.


[1, 0, 1, 1]

In [34]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

Execution stopped : maximum step overflowed, no EN found.


[1, 1, 0, 1]

In [35]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

Execution stopped : maximum step overflowed, no EN found.


[1, 1, 1, 1]

In [36]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[0, 2, 1, 2]

In [37]:
fictitiousPlay(ut, vehicles, tasks, 1000) # this is a better solution than BRD's NE in term of social utility

[0, 2, 1, 2]

In [38]:
# (if cells before have been re-run, it's no more the same ut matrix so no more the same results)
# obtained with fictitious play :  
# [2,1,1,0]  (maxsteps 500)
# [2,1,2,0]  (maxsteps 1500)
print("FP : "+str(ut[2,1,1,0])+" -> mean "+str(np.mean(ut[2,1,1,0])))
print("FP : "+str(ut[2,1,2,0])+" -> mean "+str(np.mean(ut[2,1,2,0])))

# obtained with best response dynamic : 
# [1,2,0,0]  (maxsteps 1000)
# [2,0,1,2]  (maxsteps 1000)
# [0,0,2,1]  (maxsteps 1000)

print("BRD : "+str(ut[1,2,0,0])+" -> mean "+str(np.mean(ut[1,2,0,0])))
print("BRD : "+str(ut[2,0,1,2])+" -> mean "+str(np.mean(ut[2,0,1,2])))
print("BRD : "+str(ut[0,0,2,1])+" -> mean "+str(np.mean(ut[0,0,2,1])))

FP : [4 7 2 7] -> mean 5.0
FP : [9 6 7 3] -> mean 6.25
BRD : [3 1 4 2] -> mean 2.5
BRD : [4 4 8 8] -> mean 6.0
BRD : [6 0 0 2] -> mean 2.0


### Regret Matching

In [39]:
def computeR(R, u, k, tasks, alloc, v): # not used
    """ Update the average regret table of vehicle v for not proposing each task
    
    Parameters:
        R : np.array(float) the regret table for each action at each step k (size: steps*tasks)
        u : the utility matrix
        k : int, the current time step
        tasks : List(int), the list of tasks id
        alloc : List(int), the current allocation
        v : int, the vehicle
    
    """
    if k == 0:
        return
    for t in tasks :
        loss = u[tuple(replaceAlloc(alloc, v, t))][v] - u[alloc][v]
        R[k+1,t] = ((k-1)/k)*R[k,t] + (1/k) * loss

def getNextR(lastR, u, k, tasks, alloc, v):
    """ Compute the average regret vector of vehicle v for not proposing each task, based on last regret vector
    
    Parameters:
        lastR : np.array(float) the regret vactor for each task at last step k-1
        u : the utility matrix
        k : int, the current time step
        tasks : List(int), the list of tasks id
        alloc : List(int), the current allocation
        v : int, the vehicle
    Return:
        np.array(float) the regret vactor for each action at current step k
    """
    if k == 0:
        return np.zeros(len(tasks))
    else :
        return np.array([((k-1)/k)*lastR[t] + (1/k) * (u[tuple(replaceAlloc(alloc, v, t))][v] - u[alloc][v]) for t in tasks])
        

In [40]:
# computing regret during random play using a regret table
v=0
total_steps = 10
R = np.zeros((total_steps, len(tasks)))
for i in range(9) : # computing regret during random play
    computeR(R, ut, i, tasks, tuple(np.random.randint(0,len(tasks),len(vehicles))),v)

In [41]:
R # regret at each step k

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -5.        ],
       [ 0.        ,  1.5       , -4.        ],
       [ 0.        ,  1.        , -4.33333333],
       [ 0.        ,  2.25      , -2.5       ],
       [-0.4       ,  1.8       , -3.4       ],
       [ 0.5       ,  1.83333333, -2.83333333],
       [ 1.        ,  1.28571429, -2.42857143],
       [ 0.875     ,  1.875     , -2.125     ]])

In [42]:
# computing it during random play with last regret only (no regret table)
v=0
lastR = np.zeros(len(tasks)) 
for k in range(9) : # computing regret during random play
    lastR = getNextR(lastR, ut, k, tasks, tuple(np.random.randint(0,len(tasks),len(vehicles))),v)
    print(lastR)

[0. 0. 0.]
[-9. -2.  0.]
[-4.5 -1.  -0.5]
[-3.         -2.66666667 -2.33333333]
[-3.75 -2.   -2.75]
[-3.  -1.6 -2.2]
[-3.         -0.83333333 -1.83333333]
[-2.57142857 -1.85714286 -2.28571429]
[-2.25  -0.875 -1.875]


In [43]:
def probaFromRegret(R):
    """ Compute the probability to propose each task depending on the regret vector
    
    Parameters:
        R : np.array(float) the regret vector for each action at current step k     
    Return:
        np.array(float) the probability vector for each action at current step k
    
    """
    Rn = np.array(R)
    positive_reg = np.where(Rn > 0, Rn, 0)
    if np.sum(positive_reg) == 0:
        proba = np.ones(len(R))/len(R)
    else :
        proba = positive_reg/np.sum(positive_reg)
    return proba

In [44]:
print("regret vector for v : "+str(lastR))
print("proba : ")
print(probaFromRegret(lastR)) # getting proba based on lastR regret vector
print("proba when all regrets <=0 : ")
print(probaFromRegret([0,0,0])) # if all <=0 : same proba for all

regret vector for v : [-2.25  -0.875 -1.875]
proba : 
[0.33333333 0.33333333 0.33333333]
proba when all regrets <=0 : 
[0.33333333 0.33333333 0.33333333]


In [45]:
def RegretMatching(utilities, vehicles, tasks, maxsteps):
    """ Play Regret Matching until maxsteps
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        vehicles : List(int), the list of vehicle ids
        tasks : List(int) the list of task ids
        maxteps : int, the steps limit of the algorithm
        
    Returns : 
        List(int), the found allocation after maxsteps steps (no optimality guarantee)
        
    """
    k = 0
    lastR = np.zeros((len(vehicles), len(tasks)))
    cur_alloc = np.random.randint(0,len(tasks), len(vehicles))
    next_alloc = np.random.randint(0,len(tasks), len(vehicles))
    while k < maxsteps:
        for v in vehicles:
            lastRv = lastR[v]
            nextRv = getNextR(lastRv, ut, k, tasks, tuple(cur_alloc), v)
            proba = probaFromRegret(nextRv)
            task = np.random.choice(tasks, p=proba)
            next_alloc[v] = task
            lastR[v] = nextRv # update the regret of v
        #print(next_alloc)
        cur_alloc = next_alloc # update the allocation
        k += 1
    return list(cur_alloc)

In [46]:
# some runs of the 3 methods on the same ut matrix :

In [47]:
RegretMatching(ut, vehicles, tasks, 1000) 

[0, 2, 1, 2]

In [48]:
RegretMatching(ut, vehicles, tasks, 1000)

[0, 2, 1, 2]

In [21]:
RegretMatching(ut, vehicles, tasks, 1000)

[0, 1, 2, 1]

In [23]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[1, 1, 2, 0]

In [24]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[1, 1, 2, 0]

In [25]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[1, 1, 2, 0]

In [26]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

Execution stopped : maximum step overflowed, no EN found.


[0, 1, 1, 2]

In [35]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

Execution stopped : maximum step overflowed, no EN found.


[1, 2, 1, 2]

In [28]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

Execution stopped : maximum step overflowed, no EN found.


[0, 1, 1, 2]

### Spatial Adaptative Play

In [30]:
allocation = [0, 1, 1, 2]

In [31]:
replaceAlloc(allocation, 1, 2)

[0, 2, 1, 2]

In [37]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [142]:
def computeSAPproba(utilities, alloc, vehicle, tasks, ro = 0.1):
    Uv = np.array([utilities[tuple(replaceAlloc(alloc, vehicle, t))][vehicle] for t in tasks])
    #print("utilities for each task : "+str(Uv))
    proba = softmax(Uv/ro) # use softmax or logit function
    return proba

In [68]:
computeSAPproba(ut, allocation, 1, tasks, ro = 0.1) # very small ro value : probas are very unbalanced (no random)

utilities for each task : [2 5 4]


array([9.35719815e-14, 9.99954602e-01, 4.53978687e-05])

In [69]:
computeSAPproba(ut, allocation, 1, tasks, ro = 1) # small ro value : probas are unbalanced (very small random)

utilities for each task : [2 5 4]


array([0.03511903, 0.70538451, 0.25949646])

In [70]:
computeSAPproba(ut, allocation, 1, tasks, ro = 1000) # very high ro value : probas are ~ uniform (a lot of random)

utilities for each task : [2 5 4]


array([0.33277798, 0.33377781, 0.3334442 ])

In [143]:
def SpatialAdaptativePlay(utilities, vehicles, tasks, maxsteps, ro):
    k = 0
    cur_alloc = np.random.randint(0,len(tasks), len(vehicles))
    while k < maxsteps:
        v = np.random.choice(vehicles) # select uniformly the vehicle which is given the chance to update its task
        proba = computeSAPproba(ut, cur_alloc, v, tasks, ro = 1) # compute proba to propose each task
        t = np.random.choice(tasks, p=proba) # choose the task to propose
        cur_alloc = replaceAlloc(cur_alloc, v, t) # update the allocation
        k += 1
    return list(cur_alloc)

In [146]:
# spatial adaptative play
SpatialAdaptativePlay(ut, vehicles, tasks, 1000, ro=1)

[2, 1, 1, 0]

### Comparing the different negociation methods :

In [None]:
# compare utility sum and execution time of the 3 methods
score_DMR = []
time_DMR = []
score_FP = []
time_FP = []
score_RM = []
time_RM = []
score_SAP = []
time_SAP = []
for step in range(10):# 10 different utility matrix
    ut = np.random.randint(0,10,shapes) # create utility matrix with random utilities
    for repeat in range(10): # 10 repeats on each utility matrix
        # best response dynamic
        start = time.time()
        alloc = tuple(bestResponseDynamic(ut, vehicles, tasks, 1000))
        duration = time.time() - start
        print("DMR : "+str(alloc))
        #print(ut[alloc])
        score_DMR.append(np.sum(ut[alloc]))
        time_DMR.append(duration)

        # fictitious play
        start = time.time()
        alloc = tuple(fictitiousPlay(ut, vehicles, tasks, 1000))
        duration = time.time() - start
        print("FP : "+str(alloc))
        #print(ut[alloc])
        score_FP.append(np.sum(ut[alloc]))
        time_FP.append(duration)

        # regret matching
        start = time.time()
        alloc = tuple(RegretMatching(ut, vehicles, tasks, 1000))
        duration = time.time() - start
        print("RM : "+str(alloc))
        #print(ut[alloc])
        score_RM.append(np.sum(ut[alloc]))
        time_RM.append(duration)

        # spatial adaptative play
        start = time.time()
        alloc = tuple(SpatialAdaptativePlay(ut, vehicles, tasks, 1000, ro=1))
        duration = time.time() - start
        print("SAP : "+str(alloc))
        #print(ut[alloc])
        score_SAP.append(np.sum(ut[alloc]))
        time_SAP.append(duration)

Execution stopped : maximum step overflowed, no EN found.
DMR : (2, 0, 0, 1)


In [141]:
# on a first utility matrix :
print("mean DMR : "+str(np.mean(score_DMR))+" average time : "+str(np.mean(time_DMR)))
print("mean FP : "+str(np.mean(score_FP))+" average time : "+str(np.mean(time_FP)))
print("mean RM : "+str(np.mean(score_RM))+" average time : "+str(np.mean(time_RM)))
print("mean SAP : "+str(np.mean(score_SAP))+" average time : "+str(np.mean(time_SAP)))

mean DMR : 22.16 average time : 0.02802384853363037
mean FP : 23.3 average time : 11.998116133213044
mean RM : 23.94 average time : 0.6288598060607911
mean SAP : 18.68 average time : 0.1347698974609375
