In [117]:
using LinearAlgebra
using SparseArrays
using PyCall
using Plots
using IterTools
using Distributions
using ProgressMeter

┌ Info: Precompiling ProgressMeter [92933f4c-e287-5a05-a399-4b506db050ca]
└ @ Base loading.jl:1278


In [4]:
py"""
def allActions(nAct, nPlayers):
    import itertools
    idX = list(itertools.product(list(range(1, nAct + 1)), repeat=nPlayers)) * nPlayers
    return idX
"""
allActions(nAct, nPlayers) = py"allActions"(nAct, nPlayers)

allActions (generic function with 1 method)

In [12]:
function generateCorrespondences(gameParams)
    nSim, nPlayers, nActions = gameParams
    nElements = nActions^nPlayers
    Actions = allActions(nActions, nPlayers)
    allCombos = [[findall(x -> x==tuple(circshift([j for j in Actions[i]], -1*(p))...), Actions)[p+1] for p=0:nPlayers-1] for i=1:nElements]
    
    return Actions, allCombos
end

generateCorrespondences (generic function with 1 method)

In [6]:
function generateGames(Gamma, nSim, allCombos, nActions, nPlayers)
    """
    Create Covariance Matrix from Gamma
    
    Return Random Payoff Matrices: nPlayers*nElements x nSim
    
    """
    nElements = nActions^nPlayers
    cov = Matrix(1.0I, nPlayers * nElements, nPlayers * nElements)
    
    """ Cycle through each of the combinations and assign them the appropriate Gamma """
    for c in allCombos
        cov[c[1], c[2:end]] .= Gamma/(nPlayers-1)
        cov[c[2:end], c[1]] .= Gamma/(nPlayers-1)
    end
        
    """ draw from a Gaussian using this covariance matrix """

    return rand(MvNormal(zeros(nPlayers * nElements), cov), nSim)

end

generateGames (generic function with 1 method)

In [7]:
function getActionProbs(qValues, agentParams)
    """
    qValues: nPlayer x nActions x nSim
    return: nPlayer x nActions x nSim
    """
    alpha, tau, gamma = agentParams
    
    return exp.(tau * qValues)./sum(exp.(tau * qValues), dims=2)
end

getActionProbs (generic function with 1 method)

In [8]:
function chooseActions(actionProbs, gameParams)
    """
    arg: actionProbs: nPlayer x nActions x nSim
    return: nPlayer x nSim
    """
    
    nSim, nPlayers, nActions = gameParams
    
    return [[rand(Bernoulli(actionProbs[p, 1, s])) + 1 for p=1:nPlayers] for s=1:nSim]
end

chooseActions (generic function with 1 method)

In [9]:
function findPayoffs(choices, gameParams, corr, actions, payoffs)
    choiceIdx = [corr[findall(x -> x==tuple(choices[s]...), actions)[1]] for s=1:nSim]
    return [payoffs[choiceIdx[s], s] for s=1:nSim]
end

findPayoffs (generic function with 1 method)

In [10]:
function qUpdate!(qValues, payoffs, gameParams, agentParams, correlations, actions)
    actionProbs = getActionProbs(qValues, agentParams)
    bChoice = chooseActions(actionProbs, gameParams)
    rewards = findPayoffs(bChoice, gameParams, correlations, actions, payoffs)

    update = [rewards[s] - diag(qValues[:, bChoice[s], s]) + gamma * findmax(qValues[:, :, s], dims=2)[1] for s=1:nSim]
    
    for s = 1:nSim
        for p = 1:nPlayers
            qValues[p, bChoice[s][p], s] += alpha * update[s][p]
        end
    end
end

qUpdate! (generic function with 1 method)

In [95]:
function checkminMix(actionTracker, nSim, tol)
    T = hcat([reshape(actionTracker[i], nPlayers * nSim * nActions) for i=1:length(actionTracker)]...)
    relDiff = (findmax(T, dims=2)[1] - findmin(T, dims=2)[1])./findmin(T, dims=2)[1]

    part = [relDiff[i*(nActions*nPlayers)+1:(i+1)*(nActions*nPlayers)] for i=0:nSim-1]
    bRemove = [length(findall(x->x<0.01, p))>=length(p) for p in part]
    
    return bRemove
end

checkminMix (generic function with 1 method)

In [105]:
alpha = 0.03
tau = 0.05
gamma = 0.1

initnSim = 10
nSim = 2
nPlayers = 4
nActions = 2

Gamma = -1

gameParams = (nSim, nPlayers, nActions)
agentParams = (alpha, tau, gamma)

t0 = 5000
actions, corr = generateCorrespondences(gameParams)

([(1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 2, 1), (1, 1, 2, 2), (1, 2, 1, 1), (1, 2, 1, 2), (1, 2, 2, 1), (1, 2, 2, 2), (2, 1, 1, 1), (2, 1, 1, 2)  …  (1, 2, 2, 1), (1, 2, 2, 2), (2, 1, 1, 1), (2, 1, 1, 2), (2, 1, 2, 1), (2, 1, 2, 2), (2, 2, 1, 1), (2, 2, 1, 2), (2, 2, 2, 1), (2, 2, 2, 2)], [[1, 17, 33, 49], [2, 19, 37, 57], [3, 21, 41, 50], [4, 23, 45, 58], [5, 25, 34, 51], [6, 27, 38, 59], [7, 29, 42, 52], [8, 31, 46, 60], [9, 18, 35, 53], [10, 20, 39, 61], [11, 22, 43, 54], [12, 24, 47, 62], [13, 26, 36, 55], [14, 28, 40, 63], [15, 30, 44, 56], [16, 32, 48, 64]])

In [None]:
qValues = rand(nPlayers, nActions, nSim)
payoffs = generateGames(Gamma, nSim, corr, nActions, nPlayers)

actionTracker = []

nSim = initnSim
converged = 0

p = Progress(15000)

for cIter = 1:15000

    if cIter==t0
        actionTracker = []
    end    
    
    if cIter%t0==0 && cIter!=0 && cIter!=t0
        bRemove = checkminMix(actionTracker, nSim, 0.01)
        removeIdx = findall(bRemove)
        if length(removeIdx) > 0
            qValues = qValues[:, :, 1:end .∉ removeIdx]
            payoffs = payoffs[:, 1:end .∉ removeIdx]
            nSim -= 1
            converged += 1
        end
        actionTracker = [] 
    end
    
    if nSim <= 0
        break
    end
        gameParams = (nSim, nPlayers, nActions)
        agentParams = (alpha, tau, gamma)

        qUpdate!(qValues, payoffs, gameParams, agentParams, corr, actions)
        append!(actionTracker, [getActionProbs(qValues, agentParams)])
    
    sleep(0.5)
    ProgressMeter.next!(p; showvalues = [(:iter,cIter)])

end

│  - To prevent this behaviour, do `ProgressMeter.ijulia_behavior(:append)`. 
└ @ ProgressMeter /Users/aamalhussain/.julia/packages/ProgressMeter/GhAId/src/ProgressMeter.jl:463
[32mProgress:   0%|▏                                        |  ETA: 2:07:03[39m
[34m  iter:  61[39m

In [104]:
T = hcat([reshape(actionTracker[i], nPlayers * nSim * nActions) for i=1:length(actionTracker)]...)
relDiff = (findmax(T, dims=2)[1] - findmin(T, dims=2)[1])./findmin(T, dims=2)[1]

part = [relDiff[i*(nActions*nPlayers)+1:(i+1)*(nActions*nPlayers)] for i=0:nSim-1]
bRemove = [length(findall(x->x<0.01, p))>=length(p) for p in part]

bRemove[1] = 1
removeIdx = findall(bRemove)


2

In [116]:
nSim

10

In [14]:
qValues = rand(nPlayers, nActions, nSim)
payoffs = generateGames(Gamma, nSim, corr, nActions, nPlayers)

probs = getActionProbs(qValues, (0.05, 1, 1))
bChoice = [[rand(Bernoulli(probs[p, 1, s])) + 1 for p=1:nPlayers] for s=1:nSim]

choiceIdx = [corr[findall(x -> x==tuple(bChoice[s]...), actions)[1]] for s=1:nSim]

rewards = [payoffs[choiceIdx[s], s] for s=1:nSim]

update = [rewards[s] - diag(qValues[:, bChoice[s], s]) + gamma * findmax(qValues[:, :, s], dims=2)[1] for s=1:nSim]
# [i for i in bChoice[1]]
for s = 1:nSim
    for p = 1:nPlayers
        qValues[p, bChoice[s][p], s] += alpha * update[s][p]
    end
end

allCombos = [[1, 17, 33, 49], [2, 19, 37, 57], [3, 21, 41, 50], [4, 23, 45, 58], [5, 25, 34, 51], [6, 27, 38, 59], [7, 29, 42, 52], [8, 31, 46, 60], [9, 18, 35, 53], [10, 20, 39, 61], [11, 22, 43, 54], [12, 24, 47, 62], [13, 26, 36, 55], [14, 28, 40, 63], [15, 30, 44, 56], [16, 32, 48, 64]]


In [None]:
LinRange(1e-2, 5e-2, 10)

In [22]:
append!(a, 1)

2-element Array{Any,1}:
 1
 1