In [2]:
import numpy as np
import time

### Define vehicles and tasks sets

In [3]:
vehicles = [0,1,2,3]
tasks = [0,1,2]

### Create random utility matrix

We use a matrix to store the utilities of the vehicles for the different possible allocations.
The advantage of using a matrix compared to storing all the possible combinations in a dictionary, a hashmap or similar, is that we save a lot of space because we don't have to store the keys, and the acces time is the same. The other advantage is that we can loop easily on the matrix and use optimized numpy functions over this matrix. Using a matrix could also be interesting to move the computations on GPU to speed it up for high dimentionnal instances.

In [4]:
# Clean version :

def getShape(nb_vehicles, nb_tasks):
    """ Compute the shape for this settings
    
    Parameters :
        nb_vehicles : int, the number of vehicles
        nb_tasks : int, the number of tasks
        
    Returns :
        a tuple containing the shape (tasks^vehicles * vehicles),
        that is the shape for the utility matrix.
        
    """
    list_dim = [nb_tasks]*nb_vehicles +[nb_vehicles]# matrix of shape tasks^vehicles * vehicles
    return tuple(list_dim)

shapes = getShape(len(vehicles),len(tasks))
shapes

(3, 3, 3, 3, 4)

In [5]:
ut = np.random.randint(0,10,shapes) # create utility matrix with random utilities

In [6]:
ut[0][0][0][0] # get utility of all agents when all do task 0

array([3, 9, 5, 3])

In [7]:
ind = tuple([0,0,0,0]) # accessing the matrix from array as index
ut[ind]

array([3, 9, 5, 3])

In [8]:
ut[0][1][2][2] # get utility of all agents when allocation for vehicles to task is = 0,1,2,2 
# (means vehicule 0 do task 0, vehicule 1 do task 1, vehicule 2 do task 2, vehicule 3 do task 2)

array([3, 1, 8, 8])

### Defining usefull functions

In [9]:
def replaceAlloc(allocation, v, t):
    """ Compute the new allocation with task t asigned to vehicle v
    
    Parameters :
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        v : int, the vehicle id
        t : int, the task id
    
    Returns :
        List(int), the new allocation
    
    """
    return list(allocation[:v])+[t]+list(allocation[v+1:])

In [10]:
def is_EN(utilities, allocation, vehicles, tasks):
    """ Check if the current allocation is a Nash Equilibrium or not
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        vehicle : List(int) the list of vehicle ids
        tasks : List(int) the list of task ids
    
    Returns :
        Tuple(boolean, int)
        A tuple containing a boolean (True if this allocation is a Nash Equilibrium, else False)
        and an integer that is the id of a vehicle that can increase its utility
        by changing unilateraly its allocation (if not EN, -1)
    
    """
    for v in range(len(vehicles)) : # for each vehicle
        current_task = allocation[v]
        current_utility = utilities[tuple(allocation)][v]
        for t in range(len(tasks)) :
            if t != current_task : # check all other tasks
                temp_ind = replaceAlloc(allocation, v, t) # allocating task t to vehicle v
                utility = utilities[tuple(temp_ind)][v]
                if utility > current_utility : # changing to another task gives more utility -> Not NE
                    return (False, v)
    return (True, -1)

We also return the id of one vehicle that can increase its utility by changing its allocation, if the solution is not a Nash Equilibrium. 
It didn't increase the computation cost and avoids looping another time later on the utility table to find one in the Best Response Dynamics, it's all benefits.

##### Nash Equilibrium test example on small dimension

In [11]:
# Create setup : 2 vehicles, 3 tasks
v = [0,1] # don't change, it's 2D example
t = [0,1,2]
ut_test = np.random.randint(0,10,(len(t),len(t),2))
ut_test

array([[[3, 3],
        [2, 5],
        [1, 1]],

       [[6, 2],
        [1, 6],
        [3, 2]],

       [[9, 8],
        [8, 5],
        [0, 9]]])

In [12]:
# Check EN for allocation (0,0)
alloc = [0,0] # set allocation to check (0,1) -> first vehicle do task 0 and second do task 1
is_EN(ut_test, alloc, v, t) # random example

(False, 0)

In [13]:
alloc = [0,1]
ut_test[tuple(alloc)] = [0,0] # set allocation (0,1) to the lowest value for each vehicle
ut_test[2,1] = [1,0] # set another allocation to a better score for one vehicle (in case matrix is full zero)
is_EN(ut_test,alloc , v, t) # -> Is necessarly not an EN (result must be False)

(False, 0)

In [14]:
alloc = [0,1]
ut_test[tuple(alloc)] = [10,10] # set allocation (0,1) to the highest value for each vehicle
is_EN(ut_test, alloc, v, t) # -> Is necessarly  an EN (result must be True)

(True, -1)

In [15]:
ut_test_z = np.zeros((len(t),len(t),2)) # set all matrix to 0 (same value everywhere)
ut_test_z

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [16]:
alloc = [2,1] # when Zero everywhere, no solution is strictly better than the current
is_EN(ut_test_z, alloc, v, t) # -> Is necessarly an EN (result must be True)

(True, -1)

### Best Response Dynamics

In [17]:
def getBestTask(utilities, allocation, v, tasks):
    """ Compute the best task for vehicle v
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        v : int, the vehicle id
        tasks : List(int) the list of task ids
        
    Returns : 
        int, the best task for vehicle v
    """
    best = np.argmax([utilities[tuple(replaceAlloc(allocation, v, t))][v] for t in range(len(tasks))])
    return best

In [18]:
def bestResponseDynamic(utilities, vehicles, tasks, maxsteps):
    """ Try to compute a Nash Equilibrium allofaction using Best Response Dynamics
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        vehicles : List(int), the list of vehicle ids
        tasks : List(int) the list of task ids
        maxteps : int, the steps limit of the algorithm
        
    Returns : 
        List(int), a Nash Equilibrium allocation if one was found (no guarantee)
        
    """
    allocation = np.random.randint(0,len(tasks),len(vehicles)) # initial random allocation
    end, id_change = is_EN(utilities, allocation, vehicles, tasks)
    steps = 0
    while not(end) and steps < maxsteps:
        # vehicle id_change has interest to change to a better allocation
        best = getBestTask(utilities, allocation, id_change, tasks) # get its best unilateral allocation
        allocation = replaceAlloc(allocation, id_change, best) # set next allocation for id_change
        end, id_change = is_EN(utilities, allocation, vehicles, tasks)
        steps += 1
    if not(end) and steps >= maxsteps: # cut the exection if maxsteps reached
        print("Execution stopped : maximum step overflowed, no EN found.")
    return list(allocation)

##### Best Response Dynamics test example on the initial matrix

In [19]:
ut[2,2,1,1] = [10,10,10,10] # we create a global optimal affectation -> At least one EN in utilities table

In [20]:
bestResponseDynamic(ut, vehicles, tasks, 1000) # the global optimum is reached (NE)

[1, 0, 2, 0]

In [21]:
bestResponseDynamic(ut, vehicles, tasks, 1000) # another Nash Equilibrium is sometimes reached

[1, 0, 2, 0]

### Fictitious Play

In [22]:
shapes = getShape(len(vehicles),len(tasks))
print(shapes)
ut = np.random.randint(0,10,shapes) # create utility matrix with random utilities

(3, 3, 3, 3, 4)


In [23]:
def computeFrequency(proposals, tasks):
    """ Compute the empirical frequency of each proposal
    
    Parameters :
        proposals : List(int), the allocation proposed
        tasks : List(int) the list of task ids
        
    Returns : 
        List(float), the empirical frequency of each proposal
    
    """
    return [np.count_nonzero(np.array(proposals) == t)/len(proposals) for t in range(len(tasks))]

In [24]:
# testing frequencies computation
proposals = [0,1,2,1,1,2,0,0,1,2,2,1,2,0,1,1,2,1,2,0,1] # proposals sequence made by an agent
taks = [0,1,2] # define tasks

In [25]:
computeFrequency(proposals,tasks) # compute frequency of each task in proposals

[0.23809523809523808, 0.42857142857142855, 0.3333333333333333]

In [26]:
def computeFullFrequencyMatrix(frequencies, vehicles, tasks): # not used
    """
        Compute the joined frequency matrix of proposed allocation for each vehicle
    """
    shapes = getShape(len(vehicles),len(tasks))
    fMat = np.zeros(shapes)
    allAlloc = [x for x,_ in np.ndenumerate(ut)]
    for alloc in allAlloc :
        fMat[alloc] = np.prod([frequencies[v][alloc[v]] for v in range(len(vehicles))])
    return fMat

In [27]:
# testing frequency matrix computation
frequencies = np.random.random((4,10)) # random frequencies

In [28]:
frequencies

array([[0.32193921, 0.50573386, 0.56258006, 0.6082882 , 0.50994466,
        0.93448077, 0.81113034, 0.87882467, 0.459991  , 0.37306147],
       [0.629901  , 0.14593952, 0.97715627, 0.49844585, 0.79883556,
        0.18165107, 0.13796542, 0.46748834, 0.05091305, 0.63970936],
       [0.83144323, 0.47846886, 0.38528909, 0.55619991, 0.84183759,
        0.4369402 , 0.35481573, 0.58292629, 0.88554318, 0.17547886],
       [0.12027081, 0.24424001, 0.94442692, 0.5705087 , 0.34420209,
        0.92411436, 0.93871641, 0.29881403, 0.00667194, 0.93237734]])

In [29]:
computeFullFrequencyMatrix(frequencies, vehicles, tasks)

array([[[[[0.02027865, 0.02027865, 0.02027865, 0.02027865],
          [0.04118088, 0.04118088, 0.04118088, 0.04118088],
          [0.15923815, 0.15923815, 0.15923815, 0.15923815]],

         [[0.01166971, 0.01166971, 0.01166971, 0.01166971],
          [0.02369827, 0.02369827, 0.02369827, 0.02369827],
          [0.09163644, 0.09163644, 0.09163644, 0.09163644]],

         [[0.00939708, 0.00939708, 0.00939708, 0.00939708],
          [0.01908313, 0.01908313, 0.01908313, 0.01908313],
          [0.07379063, 0.07379063, 0.07379063, 0.07379063]]],


        [[[0.00469829, 0.00469829, 0.00469829, 0.00469829],
          [0.00954105, 0.00954105, 0.00954105, 0.00954105],
          [0.03689332, 0.03689332, 0.03689332, 0.03689332]],

         [[0.00270371, 0.00270371, 0.00270371, 0.00270371],
          [0.00549057, 0.00549057, 0.00549057, 0.00549057],
          [0.02123092, 0.02123092, 0.02123092, 0.02123092]],

         [[0.00217718, 0.00217718, 0.00217718, 0.00217718],
          [0.0044213 , 0.004

In [30]:
def computePartialFrequencyMatrix(utilities, frequencies, vehicles, tasks, v):
    """
        Compute the expected utility for vehicle v foreach task, considering for each 
        other vehicle a random allocation choice with probability equal to empirical frequency observed 
        
    """
    temp_vehicles = vehicles[:v] + vehicles[v+1:] # create a list of index without vehicle v
    temp_alloc = np.zeros([len(tasks)]*(len(vehicles)-1)) # create a 0 array of shape (vehicles-1)^task
    allAlloc = [x for x,_ in np.ndenumerate(temp_alloc)] # enumerate all possible allocations for these vehicles
    expectations = []
    for t in range(len(tasks)):
        expected = 0
        for alloc in allAlloc : # for each possible allocation for vehicles without v
            # compute proba for these vehicles to do this allocation : 
            proba = np.prod([frequencies[temp_vehicles[v]][alloc[v]] for v in range(len(temp_vehicles))])
            # recreating full allocation with v :
            index = [0]*len(vehicles)
            for i in range(len(temp_vehicles)) :
                index[temp_vehicles[i]] = alloc[i]
            index[v] = t
            # get the utility for v if it do task t with this allocation for the other vehicles
            utility = utilities[tuple(index)][v]
            expected+= utility * proba # add proba time the utility of v to the expectation
        expectations.append(expected)
    return expectations

In [31]:
# testing expectation computation
frequencies = np.random.random((4,10)) # random frequencies (note that sum != 1, it's just for example)
frequencies

array([[0.38302148, 0.71522696, 0.10777215, 0.71475331, 0.61052984,
        0.63302887, 0.51312582, 0.3735807 , 0.11669038, 0.35616393],
       [0.73397093, 0.3190796 , 0.76547603, 0.71248524, 0.21849877,
        0.26928172, 0.42461487, 0.63032684, 0.14282733, 0.77065972],
       [0.63541185, 0.96559973, 0.72287083, 0.69430997, 0.62170353,
        0.62501284, 0.86054594, 0.77531624, 0.52590642, 0.79562915],
       [0.48667382, 0.05293033, 0.3584864 , 0.61691046, 0.89833664,
        0.6847168 , 0.32395912, 0.56953913, 0.90911629, 0.63547034]])

In [32]:
for v in vehicles : # get the task with the highest utility expectation for each vehicle
    expect = computePartialFrequencyMatrix(ut, frequencies, vehicles, tasks, v)
    print(str(expect)+" --> task "+str(np.argmax(expect)))

[17.75291563942251, 18.597496790360818, 18.006667441505297] --> task 1
[9.500301015334431, 11.754893919789275, 13.605769461766732] --> task 2
[10.457010094711231, 8.375872295951698, 8.442472471117897] --> task 0
[27.1075335869325, 22.260251535991923, 17.523112119257966] --> task 0


In [33]:
def fictitiousPlay(utilities, vehicles, tasks, maxsteps):
    """ Play Fictitious Play until maxsteps
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        vehicles : List(int), the list of vehicle ids
        tasks : List(int) the list of task ids
        maxteps : int, the steps limit of the algorithm
        
    Returns : 
        List(int), the found allocation after maxsteps steps (no optimality guarantee)
        
    """
    passedPropositions = [[t for t in tasks] for v in vehicles]
    steps = 0
    while steps < maxsteps:
        for v in vehicles :
            f = [computeFrequency(passedPropositions[v],tasks) for v in vehicles]
            expect = computePartialFrequencyMatrix(utilities, f, vehicles, tasks, v)
            bestTask = np.argmax(expect)
            passedPropositions[v].append(bestTask)
        #print([passedPropositions[v][-1] for v in vehicles])
        steps += 1
    return list([passedPropositions[v][-1] for v in vehicles])

In [34]:
# testing FP compared to BRD on some examples :

In [35]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

[0, 1, 2, 2]

In [36]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

[0, 1, 2, 2]

In [37]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

[0, 1, 2, 2]

In [38]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[2, 2, 2, 0]

In [39]:
fictitiousPlay(ut, vehicles, tasks, 1000) # this is a better solution than BRD's NE in term of social utility

[2, 2, 2, 0]

In [40]:
# (if cells before have been re-run, it's no more the same ut matrix so no more the same results)
# obtained with fictitious play :  
# [2,1,1,0]  (maxsteps 500)
# [2,1,2,0]  (maxsteps 1500)
print("FP : "+str(ut[2,1,1,0])+" -> mean "+str(np.mean(ut[2,1,1,0])))
print("FP : "+str(ut[2,1,2,0])+" -> mean "+str(np.mean(ut[2,1,2,0])))

# obtained with best response dynamic : 
# [1,2,0,0]  (maxsteps 1000)
# [2,0,1,2]  (maxsteps 1000)
# [0,0,2,1]  (maxsteps 1000)

print("BRD : "+str(ut[1,2,0,0])+" -> mean "+str(np.mean(ut[1,2,0,0])))
print("BRD : "+str(ut[2,0,1,2])+" -> mean "+str(np.mean(ut[2,0,1,2])))
print("BRD : "+str(ut[0,0,2,1])+" -> mean "+str(np.mean(ut[0,0,2,1])))

FP : [5 8 6 4] -> mean 5.75
FP : [9 8 3 7] -> mean 6.75
BRD : [8 7 9 3] -> mean 6.75
BRD : [9 0 2 4] -> mean 3.75
BRD : [8 7 2 6] -> mean 5.75


### Regret Matching

In [41]:
def computeR(R, u, k, tasks, alloc, v): # not used
    """ Update the average regret table of vehicle v for not proposing each task
    
    Parameters:
        R : np.array(float) the regret table for each action at each step k (size: steps*tasks)
        u : the utility matrix
        k : int, the current time step
        tasks : List(int), the list of tasks id
        alloc : List(int), the current allocation
        v : int, the vehicle
    
    """
    if k == 0:
        return
    for t in tasks :
        loss = u[tuple(replaceAlloc(alloc, v, t))][v] - u[alloc][v]
        R[k+1,t] = ((k-1)/k)*R[k,t] + (1/k) * loss

def getNextR(lastR, u, k, tasks, alloc, v):
    """ Compute the average regret vector of vehicle v for not proposing each task, based on last regret vector
    
    Parameters:
        lastR : np.array(float) the regret vactor for each task at last step k-1
        u : the utility matrix
        k : int, the current time step
        tasks : List(int), the list of tasks id
        alloc : List(int), the current allocation
        v : int, the vehicle
    Return:
        np.array(float) the regret vactor for each action at current step k
    """
    if k == 0:
        return np.zeros(len(tasks))
    else :
        return np.array([((k-1)/k)*lastR[t] + (1/k) * (u[tuple(replaceAlloc(alloc, v, t))][v] - u[alloc][v]) for t in tasks])
        

In [42]:
# computing regret during random play using a regret table
v=0
total_steps = 10
R = np.zeros((total_steps, len(tasks)))
for i in range(9) : # computing regret during random play
    computeR(R, ut, i, tasks, tuple(np.random.randint(0,len(tasks),len(vehicles))),v)

In [43]:
R # regret at each step k

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 2.        ,  0.        , -1.        ],
       [ 1.        ,  4.        ,  3.5       ],
       [ 2.        ,  2.66666667,  2.33333333],
       [ 1.5       ,  2.75      ,  2.        ],
       [ 2.        ,  2.2       ,  1.6       ],
       [ 1.66666667,  1.33333333,  1.5       ],
       [ 1.42857143,  1.57142857,  1.42857143],
       [ 1.5       ,  1.375     ,  1.625     ]])

In [44]:
# computing it during random play with last regret only (no regret table)
v=0
lastR = np.zeros(len(tasks)) 
for k in range(9) : # computing regret during random play
    lastR = getNextR(lastR, ut, k, tasks, tuple(np.random.randint(0,len(tasks),len(vehicles))),v)
    print(lastR)

[0. 0. 0.]
[4. 3. 0.]
[ 2.   0.5 -1. ]
[ 0.          0.33333333 -0.66666667]
[ 0.   -0.5  -0.25]
[ 0.  -0.8 -0.8]
[ 0.  -1.  -0.5]
[ 0.         -0.42857143 -0.28571429]
[-0.25 -0.75 -0.25]


In [45]:
def probaFromRegret(R):
    """ Compute the probability to propose each task depending on the regret vector
    
    Parameters:
        R : np.array(float) the regret vector for each action at current step k     
    Return:
        np.array(float) the probability vector for each action at current step k
    
    """
    Rn = np.array(R)
    positive_reg = np.where(Rn > 0, Rn, 0)
    if np.sum(positive_reg) == 0:
        proba = np.ones(len(R))/len(R)
    else :
        proba = positive_reg/np.sum(positive_reg)
    return proba

In [46]:
print("regret vector for v : "+str(lastR))
print("proba : ")
print(probaFromRegret(lastR)) # getting proba based on lastR regret vector
print("proba when all regrets <=0 : ")
print(probaFromRegret([0,0,0])) # if all <=0 : same proba for all

regret vector for v : [-0.25 -0.75 -0.25]
proba : 
[0.33333333 0.33333333 0.33333333]
proba when all regrets <=0 : 
[0.33333333 0.33333333 0.33333333]


In [47]:
def RegretMatching(utilities, vehicles, tasks, maxsteps):
    """ Play Regret Matching until maxsteps
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        vehicles : List(int), the list of vehicle ids
        tasks : List(int) the list of task ids
        maxteps : int, the steps limit of the algorithm
        
    Returns : 
        List(int), the found allocation after maxsteps steps (no optimality guarantee)
        
    """
    k = 0
    lastR = np.zeros((len(vehicles), len(tasks)))
    cur_alloc = np.random.randint(0,len(tasks), len(vehicles))
    next_alloc = np.random.randint(0,len(tasks), len(vehicles))
    while k < maxsteps:
        for v in vehicles:
            lastRv = lastR[v]
            nextRv = getNextR(lastRv, ut, k, tasks, tuple(cur_alloc), v)
            proba = probaFromRegret(nextRv)
            task = np.random.choice(tasks, p=proba)
            next_alloc[v] = task
            lastR[v] = nextRv # update the regret of v
        #print(next_alloc)
        cur_alloc = next_alloc # update the allocation
        k += 1
    return list(cur_alloc)

In [48]:
# some runs of the 3 methods on the same ut matrix :

In [49]:
RegretMatching(ut, vehicles, tasks, 1000) 

[0, 1, 2, 2]

In [50]:
RegretMatching(ut, vehicles, tasks, 1000)

[0, 1, 2, 2]

In [51]:
RegretMatching(ut, vehicles, tasks, 1000)

[0, 0, 0, 2]

In [52]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[2, 2, 2, 0]

In [53]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[2, 2, 2, 0]

In [54]:
fictitiousPlay(ut, vehicles, tasks, 1000)

[2, 2, 2, 0]

In [55]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

[0, 1, 2, 2]

In [56]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

[2, 2, 2, 0]

In [57]:
bestResponseDynamic(ut, vehicles, tasks, 1000)

[0, 1, 2, 2]

### Spatial Adaptative Play

In [58]:
allocation = [0, 1, 1, 2]

In [59]:
replaceAlloc(allocation, 1, 2)

[0, 2, 1, 2]

In [60]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [61]:
def computeSAPproba(utilities, alloc, vehicle, tasks, ro = 0.1):
    Uv = np.array([utilities[tuple(replaceAlloc(alloc, vehicle, t))][vehicle] for t in tasks])
    #print("utilities for each task : "+str(Uv))
    proba = softmax(Uv/ro) # use softmax or logit function
    return proba

In [62]:
computeSAPproba(ut, allocation, 1, tasks, ro = 0.1) # very small ro value : probas are very unbalanced (no random)

array([9.99999998e-01, 2.06115362e-09, 1.92874984e-22])

In [63]:
computeSAPproba(ut, allocation, 1, tasks, ro = 1) # small ro value : probas are unbalanced (very small random)

array([0.8756006 , 0.11849965, 0.00589975])

In [64]:
computeSAPproba(ut, allocation, 1, tasks, ro = 1000) # very high ro value : probas are ~ uniform (a lot of random)

array([0.33411131, 0.33344376, 0.33244493])

In [65]:
def SpatialAdaptativePlay(utilities, vehicles, tasks, maxsteps, ro):
    k = 0
    cur_alloc = np.random.randint(0,len(tasks), len(vehicles))
    while k < maxsteps:
        v = np.random.choice(vehicles) # select uniformly the vehicle which is given the chance to update its task
        proba = computeSAPproba(ut, cur_alloc, v, tasks, ro = 1) # compute proba to propose each task
        t = np.random.choice(tasks, p=proba) # choose the task to propose
        cur_alloc = replaceAlloc(cur_alloc, v, t) # update the allocation
        k += 1
    return list(cur_alloc)

In [66]:
# spatial adaptative play
SpatialAdaptativePlay(ut, vehicles, tasks, 1000, ro=1)

[2, 1, 1, 2]

### Comparing the different negociation methods :

In [66]:
# compare utility sum and execution time of the 3 methods
score_DMR = []
time_DMR = []
score_FP = []
time_FP = []
score_RM = []
time_RM = []
score_SAP = []
time_SAP = []
for step in range(10):# 10 different utility matrix
    ut = np.random.randint(0,10,shapes) # create utility matrix with random utilities
    for repeat in range(10): # 10 repeats on each utility matrix
        # best response dynamic
        start = time.time()
        alloc = tuple(bestResponseDynamic(ut, vehicles, tasks, 1000))
        duration = time.time() - start
        print("DMR : "+str(alloc))
        #print(ut[alloc])
        score_DMR.append(np.sum(ut[alloc]))
        time_DMR.append(duration)

        # fictitious play
        start = time.time()
        alloc = tuple(fictitiousPlay(ut, vehicles, tasks, 1000))
        duration = time.time() - start
        print("FP : "+str(alloc))
        #print(ut[alloc])
        score_FP.append(np.sum(ut[alloc]))
        time_FP.append(duration)

        # regret matching
        start = time.time()
        alloc = tuple(RegretMatching(ut, vehicles, tasks, 1000))
        duration = time.time() - start
        print("RM : "+str(alloc))
        #print(ut[alloc])
        score_RM.append(np.sum(ut[alloc]))
        time_RM.append(duration)

        # spatial adaptative play
        start = time.time()
        alloc = tuple(SpatialAdaptativePlay(ut, vehicles, tasks, 1000, ro=1))
        duration = time.time() - start
        print("SAP : "+str(alloc))
        #print(ut[alloc])
        score_SAP.append(np.sum(ut[alloc]))
        time_SAP.append(duration)

DMR : (0, 2, 0, 0)
FP : (0, 1, 2, 1)
RM : (0, 1, 2, 1)
SAP : (0, 2, 0, 0)
DMR : (0, 2, 0, 0)
FP : (0, 1, 2, 1)
RM : (0, 1, 2, 1)
SAP : (1, 0, 2, 2)
DMR : (0, 1, 2, 1)
FP : (0, 1, 2, 1)
RM : (0, 1, 2, 1)
SAP : (2, 2, 2, 2)
DMR : (2, 2, 1, 1)
FP : (0, 1, 2, 1)
RM : (0, 1, 0, 1)
SAP : (2, 1, 1, 1)
DMR : (0, 2, 0, 0)
FP : (0, 1, 2, 1)
RM : (0, 1, 2, 1)
SAP : (1, 2, 2, 1)
DMR : (2, 2, 1, 1)
FP : (0, 1, 2, 1)
RM : (0, 1, 2, 1)
SAP : (0, 1, 2, 1)
DMR : (1, 0, 2, 0)
FP : (0, 1, 2, 1)
RM : (0, 1, 2, 1)
SAP : (2, 0, 2, 2)
DMR : (0, 1, 2, 1)
FP : (0, 1, 2, 1)
RM : (0, 1, 2, 1)
SAP : (2, 1, 2, 1)
DMR : (2, 2, 1, 1)
FP : (0, 1, 2, 1)
RM : (2, 1, 1, 2)
SAP : (2, 2, 1, 1)
DMR : (0, 1, 2, 1)
FP : (0, 1, 2, 1)
RM : (2, 0, 1, 0)
SAP : (1, 1, 2, 0)
DMR : (1, 0, 2, 2)
FP : (1, 0, 2, 2)
RM : (1, 0, 2, 2)
SAP : (1, 0, 0, 2)
DMR : (1, 0, 2, 2)
FP : (1, 0, 2, 2)
RM : (1, 0, 2, 2)
SAP : (1, 1, 0, 2)
DMR : (1, 0, 2, 2)
FP : (1, 0, 2, 2)
RM : (0, 2, 1, 0)
SAP : (1, 0, 2, 2)
DMR : (1, 0, 2, 2)
FP : (1, 0, 2, 2)
R

FP : (0, 1, 1, 0)
RM : (1, 2, 2, 1)
SAP : (1, 2, 2, 1)
DMR : (0, 1, 1, 0)
FP : (0, 1, 1, 0)
RM : (1, 2, 2, 1)
SAP : (0, 1, 1, 0)
Execution stopped : maximum step overflowed, no EN found.
DMR : (2, 0, 2, 2)
FP : (0, 1, 1, 0)
RM : (1, 2, 2, 1)
SAP : (2, 2, 0, 0)
DMR : (1, 2, 2, 1)
FP : (0, 1, 1, 0)
RM : (1, 2, 2, 1)
SAP : (2, 0, 2, 2)
DMR : (0, 1, 1, 0)
FP : (0, 1, 1, 0)
RM : (1, 2, 2, 1)
SAP : (0, 0, 1, 0)
DMR : (1, 2, 2, 1)
FP : (0, 1, 1, 0)
RM : (1, 2, 2, 1)
SAP : (1, 2, 2, 1)
DMR : (1, 2, 2, 1)
FP : (0, 1, 1, 0)
RM : (0, 1, 1, 0)
SAP : (0, 1, 1, 0)
DMR : (0, 1, 1, 0)
FP : (0, 1, 1, 0)
RM : (0, 1, 1, 0)
SAP : (0, 2, 1, 2)
DMR : (0, 1, 1, 0)
FP : (0, 1, 1, 0)
RM : (1, 2, 2, 1)
SAP : (1, 2, 2, 0)
DMR : (0, 1, 0, 0)
FP : (1, 0, 2, 0)
RM : (1, 0, 2, 0)
SAP : (2, 1, 0, 0)
DMR : (1, 0, 1, 1)
FP : (1, 0, 2, 0)
RM : (1, 0, 2, 0)
SAP : (1, 0, 1, 1)
DMR : (1, 0, 2, 0)
FP : (1, 0, 2, 0)
RM : (1, 0, 2, 0)
SAP : (0, 2, 2, 0)
Execution stopped : maximum step overflowed, no EN found.
DMR : (1, 2, 0,

In [67]:
# on a first utility matrix :
print("mean DMR : "+str(np.mean(score_DMR))+" average time : "+str(np.mean(time_DMR)))
print("mean FP : "+str(np.mean(score_FP))+" average time : "+str(np.mean(time_FP)))
print("mean RM : "+str(np.mean(score_RM))+" average time : "+str(np.mean(time_RM)))
print("mean SAP : "+str(np.mean(score_SAP))+" average time : "+str(np.mean(time_SAP)))

mean DMR : 24.42 average time : 0.005681312084197998
mean FP : 27.6 average time : 4.558608031272888
mean RM : 27.31 average time : 0.24934000492095948
mean SAP : 23.48 average time : 0.057477707862854006


In [67]:
def getNashProduct(utilities, allocation):
    """ Compute the nash social welfare criteria of an allocation
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
    
    Returns :
        Int
        The nash product welfare of the allocation
        (Utilities are supposed to be expressed as int)
    
    """
    
    nsw = 1
    for u in utilities[tuple(allocation)]:
        nsw *= u

    return nsw

In [68]:
ut_test = np.random.randint(0,10,shapes)
allocation_res = fictitiousPlay(ut_test, vehicles, tasks, 1000)
print("allocation res =", allocation_res)
print("utility values for the allocation =", ut_test[tuple(allocation_res)])
print("Nash product associated =", getNashProduct(ut_test, allocation_res))

allocation res = [1, 2, 0, 0]
utility values for the allocation = [7 4 4 8]
Nash product associated = 896


In [69]:
def getSumUt(utilities, allocation):
    """ Compute the sum of the utilities of agents generated by an allocation
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
    
    Returns :
        Int
        The sum of utilities generated by the allocation
        (Utilities are supposed to be expressed as int)
    
    """
    
    sumUt = 0
    for u in utilities[tuple(allocation)]:
        sumUt += u

    return sumUt

In [70]:
print("Ut sum associated =", getSumUt(ut_test, allocation_res))

Ut sum associated = 23


In [71]:
def getEgalitarienSW(utilities, allocation):
    """ Compute the egalitarian social welfare measure of agents generated by an allocation
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
    
    Returns :
        Int
        The egalitarian social welfare measure generated by the allocation
        (Utilities are supposed to be expressed as int)
    
    """
    
    egalitarianSW = utilities[tuple(allocation)][0]
    
    for u in utilities[tuple(allocation)]:
        if u < egalitarianSW:
            egalitarianSW = u

    return egalitarianSW

In [72]:
print("ESW associated =", getEgalitarienSW(ut_test, allocation_res))

ESW associated = 4


In [73]:
def getProportionality(utilities, allocation, vehicles):
    """ Compute if an allocation satisfies the proportionate criteria 
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        vehicle : List(int) the list of vehicle ids
    
    Returns :
        Bool
        If an allocation is proportionate (True) or not (False)
        (Utilities are supposed to be expressed as int)
    
    """
    pfs = getSumUt(utilities, allocation)/len(vehicles)
    
    for u in utilities[tuple(allocation)]:
        if u < pfs:
            return False

    return True

In [74]:
print("Prop associated =", getProportionality(ut_test, allocation_res, vehicles))

Prop associated = False


In [75]:
def computeEnvies(utilities, allocation, vehicles):
    """ Compute the envy of an agent according to its allocation and the ones of others agents
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        vehicle : List(int) the list of vehicle ids
    
    Returns :
        Matrix(Int)
        The envy matrix of dimension nb_vehicles*nb_vehicles
    
    """

    nb_agents = len(vehicles)
    envies = np.zeros((nb_agents,nb_agents), dtype=int)
    
    for i in range(nb_agents):
        for j in range(nb_agents):
            if i != j:
                temp_alloc = replaceAlloc(allocation, i, allocation[j])
                envies[i,j] = utilities[tuple(temp_alloc)][i] - utilities[tuple(allocation)][i]

    return envies

def getMaxEnvy(utilities, allocation, vehicles):
    """ compute the maximum value of envy of an allocation
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        vehicle : List(int) the list of vehicle ids
    
    Returns :
        Int
        The maximum envy value
        (Utilities are supposed to be expressed as int)
    
    """
    envies = computeEnvies(utilities,allocation, vehicles)

    nb_agents = len(vehicles)
    max_envy = 0
    
    for i in range(nb_agents):
        for j in range(nb_agents):
            if envies[i,j] > max_envy:
                max_envy = envies[i,j]

    return max_envy



def getEnvy(utilities, allocation, vehicles):
    """ Test if an allocation is envy-free or not
    
    Parameters :
        utilities : Matrix(int) the utility matrix of dimension nb_tasks^nb_vehicles * nb_vehicles
        allocation : List(int) the list of tasks allocades to each vehicle (in order)
        vehicle : List(int) the list of vehicle ids
    
    Returns :
        Bool
        If an allocation is EF (True) or not (False)
        (Utilities are supposed to be expressed as int)
    
    """
    max_envy = getMaxEnvy(utilities, allocation, vehicles)
    
    if max_envy > 0:
        return False

    return True


In [76]:
print("Envies associated =", computeEnvies(ut_test, allocation_res, vehicles))

Envies associated = [[ 0 -2 -5 -5]
 [-2  0 -1 -1]
 [ 0 -4  0  0]
 [-2  0  0  0]]


In [77]:
print("Envyfreeness associated =", getEnvy(ut_test, allocation_res, vehicles))

Envyfreeness associated = True


In [78]:
def getRandomUt(nb_vehicles, nb_tasks, ut_range):
    """ Compute the utility matrix with random value in utility range given and an null task
    
    Parameters :
        nb_vehicles : int, the number of vehicles
        nb_tasks : int, the number of tasks
        ut_range : Tuple(int) the range of values that utility can take
        
    Returns :
        Matrix(Int)
        The utility matrix produced
        
    """
    min_ut, max_ut = ut_range
    
    shapes = getShape(nb_vehicles,nb_tasks+1)
    
    utilities = np.random.randint(min_ut,max_ut,shapes)
    
    alloc_dim = np.zeros([nb_tasks+1]*(nb_vehicles))
    allAlloc = [x for x,_ in np.ndenumerate(alloc_dim)] 
    
    for a in allAlloc:
        for i in range(utilities[a].size):
            if a[i] == 0:
                utilities[a][i] = 0

    return utilities
   

In [79]:
def getEqualySharedUt(nb_vehicles, nb_tasks, ut_range):
    """ Compute the utility matrix with random value in utility range given and an null task
    according to an equaly shared utility for agents sharing the same target
    
    Parameters :
        nb_vehicles : int, the number of vehicles
        nb_tasks : int, the number of tasks
        ut_range : Tuple(int) the range of values that utility can take
        
    Returns :
        Matrix(Int)
        The utility matrix produced
        
    """
    
    min_ut, max_ut = ut_range
    
    shapes = getShape(nb_vehicles,nb_tasks+1)
    
    utilities = np.random.randint(min_ut,max_ut,shapes)
    
    alloc_dim = np.zeros([nb_tasks+1]*(nb_vehicles))
    allAlloc = [x for x,_ in np.ndenumerate(alloc_dim)] 
    
    for a in allAlloc:
        sharedUt = {}
        
        for vi in range(utilities[a].size):
            if a[vi] not in sharedUt:
                sharedUt[a[vi]] = [utilities[a][vi], 1]
            else:
                sharedUt[a[vi]][0] += utilities[a][vi]
                sharedUt[a[vi]][1] += 1
                
        for i in range(utilities[a].size):
            if a[i] == 0:
                utilities[a][i] = 0
            else:
                utilities[a][i] = sharedUt[a[vi]][0]/sharedUt[a[vi]][1]
            
        
    return utilities
    
    

In [87]:
test = np.isnan(getEqualySharedUt(4, 3, (0,10)))
print(test == True)

[[[[[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]]


  [[[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]]


  [[[False False Fal

In [85]:
np.nan.all()

AttributeError: 'float' object has no attribute 'all'

In [110]:
np.zeros(1).all() == 0

True

In [109]:
a = np.empty((3,3)).fill(np.nan)
print(a == None)

True


In [95]:
np.isnan(a)

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [100]:
print(getEqualySharedUt(4, 3, (0,10)) == np.nan)

[[[[[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]]


  [[[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]

   [[False False False False]
    [False False False False]
    [False False False False]
    [False False False False]]]


  [[[False False Fal

In [103]:
np.nan == np.nan

False

In [None]:
np()