In [23]:
using CSV
using DataFrames
using Statistics
using StatsBase
using Random
using LinearAlgebra
using SparseArrays
using Distributions
using Bootstrap
import Bootstrap:draw!
import Base:copy, length

In [24]:
BLAS.set_num_threads(1)

In [25]:
function make_strats(num_strats)
    K = length(num_strats)
    strats = [1:n for n in num_strats]
    idx2strat = Dict{Int64, NTuple{K,Int64}}([n => s for (n, s) in enumerate(Iterators.product(strats...))]...)
    strat2idx = Dict{NTuple{K,Int64}, Int64}([s => n for (n, s) in enumerate(Iterators.product(strats...))]...)
    strats_tuples = [s  for s in Iterators.product(strats...)]
    alt_strats = []
    for k in 1:K
        palts = []
        for sidx in 1:length(strats_tuples)
            psalts = []
            s = idx2strat[sidx]
            for i in 1:num_strats[k]
                ns = ntuple(j->(j==k ? i : s[j]), length(s))
                push!(psalts, strat2idx[ns])
            end
            push!(palts, psalts)
        end
        push!(alt_strats, palts)
    end

    return strats_tuples, idx2strat, strat2idx, alt_strats
end

function compute_eta(num_strats)
    return 1.0 / sum(num_strats .- 1)
end

compute_eta (generic function with 1 method)

In [26]:
function makeCinfinite_dense!(C, strats, alts, players, payoffs, η, m)
    numS = length(strats)
    j=0
    f1 = 0.0
    f2 = 0.0
    p = 0.0
    tie = η / m
    for i in 1:numS
        for k in 1:players
            for j in alts[k][i]
                if j == i
                    continue
                else
                    f1 = payoffs(k, j)
                    f2 = payoffs(k, i)
                    if f1 > f2
                        C[i,j] = η
#                     elseif f2 > f1
#                         C[i,j] = 0.0
                    elseif isapprox(f1,f2)
#                         p = η / m
                        C[i,j] = tie #p
#                     else
#                         throw(Error("unknown condition: f1: $(f1) f2: $(f2)"))
                    end
                end
            end
        end
    end 
    for s in 1:numS
        C[s,s] = 1.0 - sum(view(C,s,:))
    end
    return C
end



function makeCinfinite_bounds(strats, alts, players, payoffs, η, m)
    Ic = Array{Int64,1}(); 
    Jc = Array{Int64,1}(); 
    Vl = Array{Float64,1}();
    Vu = Array{Float64,1}();
    num_uncertain = 0
    Eu = Array{Array{Int64,1},1}()
    numS = length(strats)
    tie = η/m
    for i in 1:numS
        uncertain = Array{Int64,1}()
        push!(uncertain, i)
        for k in 1:players
            for j in alts[k][i]
                if j == i
                    continue
                else
                    f1_low  = payoffs(k, j, :left)
                    f1_high = payoffs(k, j, :right)
                    f2_low  = payoffs(k, i, :left)
                    f2_high = payoffs(k, i, :right)
                    push!(Ic, i)
                    push!(Jc, j)
                    if (f1_low > f2_high)
                        push!(Vl, η)
                        push!(Vu, η)
                    elseif f2_low > f1_high
                        push!(Vl, 0)
                        push!(Vu, 0)
                    elseif isapprox(f1_low,f2_low) && isapprox(f1_high, f2_high)
                        push!(Vl, tie)
                        push!(Vu, tie)
                    else
                        push!(Vl, 0.0)
                        push!(Vu, η)
                        push!(uncertain, j)
                        num_uncertain += 1
                    end
                end
            end
        end
        push!(Eu, uncertain)
    end 
    numS = length(strats)
    Clow = sparse(Ic, Jc, Vl, numS, numS)
    Chigh = sparse(Ic, Jc, Vu, numS, numS)
    dropzeros!(Clow)
    dropzeros!(Chigh)
    for s in 1:numS
        ch = 1.0 - sum(Clow[s,:])
        Clow[s,s] = 1.0 - sum(Chigh[s,:])
        Chigh[s,s] = ch
    end
    return Clow, Chigh, Eu, num_uncertain
end


makeCinfinite_bounds (generic function with 1 method)

In [27]:
function valueiteration_rowsp!(Pi, Pi_low, Pi_high, Ri, v, γ, numS, p0, rpert; Eu)
    Pi[Eu] .= Pi_low[Eu]
    if length(Eu) == 0
        return nothing
    end
    #     w1 = (Ri .+ v .+ rpert)
    w = zeros(length(Eu))
    
    @. w =  (Ri + γ * v[Eu] + rpert[Eu])
    idxs = sortperm(w)
    p = p0

    for idx in idxs[end:-1:1]
        j = Eu[idx]
        pjh = Pi_high[j]
        pjl = Pi_low[j]
        dp = min(pjh-pjl, 1.0 - p)    
        Pi[j] = pjl+dp
        p += dp
    end
end

vierror_bound(ϵ, γ) = (2*ϵ*γ)/(1-γ)

function valueiteration_contagg(P_low, P_high, Eu, Rs, numS, direction; γ=(1.0 - 1e-8))
    R = zeros(size(Rs))
    if direction == :max
        R .= Rs
    elseif direction == :min
        R .= -Rs
    else
        println("ERROR unrecognized symbol: ", direction)
        return NaN
    end
    v = zeros(numS)
    vold = zeros(numS)
    P = zeros((numS, numS))
    Ptmps = zeros((numS, numS))
    Ptmps .= P_low
    P .= P_low
    P0 = [sum(P_low[i, :]) for i in 1:numS]
    rper = rand(Float64, numS) .* (1e-10 / (1-γ))
    iteration = 0
    changed = true
    while changed
        iteration += 1
        if iteration > 400
            ϵ = norm(v .- vold, Inf)
            bound = vierror_bound(ϵ, γ) # bound on max distance to optimal value function for any state
            bound = (1.0 - γ) * bound # bound on max error to bound on alpha rank (do this to converge regradless of gamma choice)
            println("hit iteration limit, epsilon = $(ϵ), bound = $(bound)")
            break
        end
        vold .= v
        changed = false
        for s in 1:numS
            edges = Eu[s]

            valueiteration_rowsp!(view(Ptmps, s, :), view(P_low, s, :), view(P_high, s, :), R[s], vold, γ, numS, P0[s], rper, Eu=edges)
            if any(abs.(view(Ptmps, s, edges) .- view(P, s, edges)) .>= 1e-8)
                changed=true
            end
            P[s, edges] .= view(Ptmps,s, edges)
        end
        v .= inv(I - γ .* P) * R
        ϵ = norm(v .- vold, Inf)
        bound = vierror_bound(ϵ, γ) # bound on max distance to optimal value function for any state
        bound = (1.0 - γ) * bound # bound on max error to bound on alpha rank (do this to converge regradless of gamma choice)
        if bound < 1e-7
            changed = false
        end
    end
    sol = (1.0 - γ) * mean(abs.(v))
    return sol
end

valueiteration_contagg (generic function with 1 method)

In [28]:
struct PerfDists
    dists::Dict{String,Dict{String,ECDF{Array{Float64,1}}}}
    algs::Array{String,1}
    envs::Array{String,1}
    bounds::Dict{String,Tuple{Float64,Float64}}
end

function draw!(x::PerfDists, o::PerfDists)
    algs = x.algs
    envs = x.envs
    for alg in algs
        for env in envs
            d = x.dists[alg][env].sorted_values
            idx = sample(1:length(d), length(d))
            o.dists[alg][env] = ecdf(d[idx])
        end
    end
end

function copy(x::PerfDists)
    return deepcopy(x)
end

function length(x::PerfDists)
    tot = 0
    for alg in x.algs
        for env in x.envs
            tot += length(x.dists[alg][env].sorted_values)
        end
    end
    return tot
end

length (generic function with 171 methods)

In [29]:
function (ecdf::ECDF)(x::Real, δ, tail=:both, method=:DKW)
    isnan(x) && return NaN
    p = ecdf(x)
    n = length(ecdf.sorted_values)
    ϵ = √(log(2.0/δ)/2n)
    if tail == :both
        return (max(p-ϵ, 0.0), min(p+ϵ, 1.0))
    elseif tail == :left
        return max(p-ϵ, 0.0)
    elseif tail == :right
        return min(p+ϵ, 1.0)
    else
        throw(ArgumentError("tail=$(tail) is invalid"))
    end   
end

function (ecdf::ECDF)(x::Real, δ, a::Real, b::Real, tail=:both, method=:DKW)
    isnan(x) && return NaN
    n = length(ecdf.sorted_values)
    ϵ = √(log(2.0/δ)/2n)
    pl = 0.0
    ph = 0.0
    if x < a
        pl = 0.0
        ph = 0.0
    elseif x ≥ b
        pl = 1.0
        ph = 1.0
    else
        p = ecdf(x)
        pl = max(p-ϵ, 0.0)
        ph = min(p+ϵ, 1.0)
    end
    
    
    if tail == :both
        return pl, ph
    elseif tail == :left
        return pl
    elseif tail == :right
        return ph
    else
        throw(ArgumentError("tail=$(tail) is invalid"))
    end   
end

In [30]:
# CI for mean of a bounded random variable
function andersons_mean(D, δ, a, b, tail=:both)
    # assume D sorted
    # alpha is cvar threshold
    # δ is CI to hold with probability 1-δ
    # a is the lower bound on the Data
    # b is the upper bound on the Data
    N = length(D)
    ϵ = √(log(2.0/δ)/(2*N))
    if tail == :both
        Z = vcat(D, b)
        tmp = clamp.(collect(Float64, 1:N) ./ N .- ϵ, 0.0, Inf)
        upper = b - sum(diff(Z) .* tmp)

        Z = vcat(a, D)
        tmp = clamp.(collect(Float64, 0:N-1) ./ N .+ ϵ, -Inf, 1.0)
        lower = D[end] - sum(diff(Z) .* tmp)
        return lower, upper
    elseif tail == :left
        Z = vcat(a, D)
        tmp = clamp.(collect(Float64, 0:N-1) ./ N .+ ϵ, -Inf, 1.0)
        lower = D[end] - sum(diff(Z) .* tmp)
        return lower
    elseif tail == :right
        Z = vcat(D, b)
        tmp = clamp.(collect(Float64, 1:N) ./ N .- ϵ, 0.0, Inf)
        upper = b - sum(diff(Z) .* tmp)
        return upper
    else
        throw(ArgumentError("tail=$(tail) is invalid"))
    end   
end

function andersons_meangx(D, g, δ, a, b, tail=:both)
    # assume D sorted
    # alpha is cvar threshold
    # δ is CI to hold with probability 1-δ
    # a is the lower bound on the Data
    # b is the upper bound on the Data
    N = length(D)
    ϵ = √(log(2.0/δ)/(2*N))
    if tail == :both
        Z = vcat(D, b)
        gZ = g.(Z)
        tmp = clamp.(collect(Float64, 1:N) ./ N .- ϵ, 0.0, Inf)
        upper = gZ[end] - sum(diff(gZ) .* tmp)
        
        Z .= vcat(a, D)
        gZ .= g.(Z)
        tmp = clamp.(collect(Float64, 0:N-1) ./ N .+ ϵ, -Inf, 1.0)
        lower = gZ[end] - sum(diff(gZ) .* tmp)
        return lower, upper
    elseif tail == :left
        Z = vcat(a, D)
        gZ = g.(Z)
        tmp = clamp.(collect(Float64, 0:N-1) ./ N .+ ϵ, -Inf, 1.0)
        lower = gZ[end] - sum(diff(gZ) .* tmp)
        return lower
    elseif tail == :right
        Z = vcat(D, b)
        gZ = g.(Z)
        tmp = clamp.(collect(Float64, 1:N) ./ N .- ϵ, 0.0, Inf)
        upper = gZ[end] - sum(diff(gZ) .* tmp)
        return upper
    else
        throw(ArgumentError("tail=$(tail) is invalid"))
    end   
end

function cdfnormalized_mean(Fx, Fy, δx, δy, a, b, tail=:both)
    # Fx is normalizing distribution
    # Fy is empirical cdf of input variable
    # δx is CI to hold with probability 1-δx for Fx
    # δy is CI to hold with probability 1-δx for Fy
    # a is the lower bound on the distribution of X and Y
    # b is the upper bound on the distribution of X and Y
    y = Fy.sorted_values
    if tail == :both
        gl(x) = Fx(x, δx, a, b, :left, :DKW)
        lower = andersons_meangx(y, gl, δy, a, b, :left)
        gu(x) = Fx(x, δx, a, b, :right, :DKW)
        upper = andersons_meangx(y, gu, δy, a, b, :right)
        return lower, upper
    elseif tail == :left
        g(x) = Fx(x, δx, a, b, :left, :DKW)
        lower = andersons_meangx(y, g, δy, a, b, :left)
        return lower
    elseif tail == :right
        g(x) = Fx(x, δx, a, b, :right, :DKW)
        upper = andersons_meangx(y, g, δy, a, b, :right)
        return upper
    else
        throw(ArgumentError("tail=$(tail) is invalid"))
    end   
end

cdfnormalized_mean (generic function with 2 methods)

In [31]:
function comp_rankings(scores)
    N = Base.size(scores)[1]
    r = zeros(Int64, Base.size(scores))
    rank = zeros(Int64, N)
    rank_low = zeros(Int64, N)
    rank_high = zeros(Int64, N)
    for i in 1:N
        rank[i] = (N+1) - count(scores[i, 1] .>= scores[:, 1])
        rank_low[i] = max((N+1) - count(scores[i, 2] .> scores[:, 3]), 1) - 1
        rank_high[i] = (N+1) - count(scores[i, 3] .> scores[:, 2])
    end
    return rank, rank_low, rank_high
end

comp_rankings (generic function with 1 method)

In [32]:
function make_perfs_nobound(D::PerfDists)
    n,m = length(D.algs), length(D.envs)
    A = zeros(Float64, (n,m,n))
    N = length(D.dists[D.algs[1]][D.envs[1]].sorted_values)
    pts = zeros(N)
    for (i,alg) in enumerate(D.algs)
        for (j,env) in enumerate(D.envs)
            for (k,algnorm) in enumerate(D.algs)
                samedef = false
                if occursin("gw", env) || occursin("chain", env)
                    if (occursin("ac", alg) && occursin("ac", algnorm)) || (occursin("sarsa", alg) && occursin("sarsa", algnorm)) || (occursin("qlambda", alg) && occursin("qlambda", algnorm))
                        if (occursin("scaled", alg) && occursin("normal", algnorm)) || (occursin("normal", alg) && occursin("scaled", algnorm))
                            samedef = true
                        end
                    end
                end    
                if i==k || samedef
                    A[i,j,k] = 0.5
                else
                    x = D.dists[alg][env].sorted_values
                    p = mean(D.dists[algnorm][env].(x))
                    A[i,j,k] = p
                end
            end
        end
    end
    return A
end

function make_perfs_bound(D::PerfDists, AB, δ, bound=:DKW)
    n,m = length(D.algs), length(D.envs)
    δ′ = δ/(n*m)
    A = zeros(Float64, (n,m,n))
    Alow = zeros(Float64, (n,m,n))
    Ahigh = zeros(Float64, (n,m,n))
    N = length(D.dists[D.algs[1]][D.envs[1]].sorted_values)

    for (i,alg) in enumerate(D.algs)
        for (j,env) in enumerate(D.envs)
            a,b = AB[env]
            for (k,algnorm) in enumerate(D.algs)
                samedef = false
                if occursin("gw", env) || occursin("chain", env)
                    if (occursin("ac", alg) && occursin("ac", algnorm)) || (occursin("sarsa", alg) && occursin("sarsa", algnorm)) || (occursin("qlambda", alg) && occursin("qlambda", algnorm))
                        if (occursin("scaled", alg) && occursin("normal", algnorm)) || (occursin("normal", alg) && occursin("scaled", algnorm))
                            samedef = true
                        end
                    end
                end    
                if i==k || samedef
                    A[i,j,k] = 0.5
                    Alow[i,j,k] = 0.5
                    Ahigh[i,j,k] = 0.5
                else
                    x = D.dists[alg][env].sorted_values
                    N = length(x)
                    pts = zeros(N)
                    pts .= D.dists[algnorm][env].(x)
                    p = mean(pts)
                    A[i,j,k] = p
                    if bound == :PBP
                        pl, pu = cdfnormalized_mean(D.dists[algnorm][env], D.dists[alg][env], δ′, δ′, a, b, :both)
                        Alow[i,j,k] = pl
                        Ahigh[i,j,k] = pu
                    elseif bound == :PBPt
                        tstar = quantile(TDist(N-1), 1 - δ′/2)
                        μ, σ = mean(pts), std(pts)
                        pl = μ - (σ / √N) * tstar
                        pu = μ + (σ / √N) * tstar
                        Alow[i,j,k] = pl
                        Ahigh[i,j,k] = pu
                    end
                end
            end
        end
    end
    return A, Alow, Ahigh
end

make_perfs_bound (generic function with 2 methods)

In [33]:
function compute_aggregate(perfs::PerfDists, moves, strats, idx2strat, strat2idx, alts)
    A = make_perfs_nobound(perfs)
    numA = length(perfs.algs) 
    numE = length(perfs.envs)
    aidx(i) = Int((i-1) ÷ numE)+1
    eidx(i) = Int((i-1) % numE)+1

    function payoffs(n,s::Int,tail=:none)
        if n == 1
            return A[s]
        else
            return -A[s]
        end
    end
    
    function payoffs(n,s::Tuple,tail=:none)
        m1, m2 = s
        j,k = eidx(m2), aidx(m2)
    
        if n == 1
            return A[m1,j,k]
        else
            return -A[m1,j,k]
        end
    end
    
    players = length(moves)
    
    η = compute_eta(moves);
    m = 50
    numS = length(strats)
    C = zeros((numS, numS))
    fill!(C, 0.0)
    makeCinfinite_dense!(C, strats, alts, players, payoffs, η, m);
    R = zeros(numS)
    γ = (numS - 1.0) / Float64(numS)

    y = zeros(Float64, numA)    
    invC = inv(I - γ .* C)
    for i in 1:numA
        for s in 1:numS
            R[s] = A[i,eidx(idx2strat[s][2]),aidx(idx2strat[s][2]),1]
        end
        v = invC * R
        y[i] = (1-γ) * mean(abs.(v))
    end
    return y
end

function compute_aggbound(perfs, moves, strats, idx2strat, strat2idx, alts, δ, bound=:DKW)
    A, Alow, Ahigh = make_perfs_bound(perfs, perfs.bounds, δ, bound)
    numA = length(perfs.algs) 
    numE = length(perfs.envs) 
    aidx(i) = Int((i-1) ÷ numE)+1
    eidx(i) = Int((i-1) % numE)+1
    
    function payoffs(n,s::Int,tail=:none)
        if tail==:none
            if n == 1
                return A[s]
            else
                return -A[s]
            end
        elseif tail==:left
            if n == 1
                return Alow[s]
            else
                return -Ahigh[s]
            end
        elseif tail==:right
            if n==1
                return Ahigh[s]
            else
                return -Alow[s]
            end
        elseif tail==:both
            if n ==1 
                return Alow[s], Ahigh[s]
            else
                return -Ahigh[s], -Alow[s]
            end
        else
            throw(ArgumentError("tail=$(tail) is invalid"))
        end
    end
    
    players = length(moves)
    
    η = compute_eta(moves);
    m = 50
    numS = length(strats)
    C = zeros((numS, numS))
    fill!(C, 0.0)
    makeCinfinite_dense!(C, strats, alts, players, payoffs, η, m);

    R = zeros(numS)
    γ = (numS - 1.0) / Float64(numS)

    y = zeros(Float64, numA)    
    ylow = zeros(Float64, numA)    
    yhigh = zeros(Float64, numA)    
    invC = inv(I - γ .* C)
    for i in 1:numA
        for s in 1:numS
            R[s] = A[i,eidx(idx2strat[s][2]),aidx(idx2strat[s][2])]
        end
        v = invC * R
        y[i] = (1-γ) * mean(abs.(v))
    end

    Clow, Chigh, Eu, num_uncertain = makeCinfinite_bounds(strats, alts, players, payoffs, η, m);
    
    for i in 1:numA
        for s in 1:numS
            R[s] = Alow[i,eidx(idx2strat[s][2]),aidx(idx2strat[s][2])]
        end
        ylow[i] = valueiteration_contagg(Clow, Chigh, Eu, R, numS, :min, γ=γ)
        for s in 1:numS
            R[s] = Ahigh[i,eidx(idx2strat[s][2]),aidx(idx2strat[s][2])]
        end
        yhigh[i] = valueiteration_contagg(Clow, Chigh, Eu, R, numS, :max, γ=γ)
    end    
    return hcat(y, ylow, yhigh)
end

compute_aggbound (generic function with 2 methods)

In [18]:
algs = ["ac-normal", "ac-scaled", "ac-parl2",
        "sarsa-normal", "sarsa-scaled", "sarsa-parl2",
        "qlambda-normal", "qlambda-scaled", "q-parl2",
        "nactd", "ppo"
]
envs = ["acrobot_d", "cartpole_d", "mntcar_d",
        "chain10d", "chain10s", 
        "chain50d", "chain50s", 
        "gw10d", "gw10s", "gw5d", "gw5s", 
        "pbbox", "pbempt", "pbmed", "pbsingle",
]

base_dir = "./data/samples";

acn_cols = ["dorder", "iorder", "full", "gamma", "lam", "palpha", "valpha", "logp", "life", "end"]
acs_cols = ["dorder", "iorder", "full", "gamma", "lam", "palpha", "valpha", "logp", "life", "end"]
acp_cols = ["dorder", "iorder", "full", "gamma", "lam", "palpha", "logp", "life", "end"]
qln_cols = ["dorder", "iorder", "full", "gamma", "lam", "eps", "qalpha", "logp", "life", "end"]
qls_cols = ["dorder", "iorder", "full", "gamma", "lam", "eps", "qalpha", "logp", "life", "end"]
qlp_cols = ["dorder", "iorder", "full", "gamma", "lam", "eps", "logp", "life", "end"]
sln_cols = ["dorder", "iorder", "full", "gamma", "lam", "eps", "qalpha", "logp", "life", "end"]
sls_cols = ["dorder", "iorder", "full", "gamma", "lam", "eps", "qalpha", "logp", "life", "end"]
slp_cols = ["dorder", "iorder", "full", "gamma", "lam", "eps", "logp", "life", "end"]
ntd_cols = ["dorder", "iorder", "full", "gamma", "lam", "palpha", "walpha", "valpha", "normw", "logp", "life", "end"]
ppo_cols = ["dorder", "iorder", "full", "gamma", "lam", "clip", "entropy", "steps", "epochs", "batch_size", "adam-eps", "alpha", "logp", "life", "end"]
alg_cols = [acn_cols, acs_cols, 
    acp_cols, 
    sln_cols, sls_cols, 
    slp_cols, 
    qln_cols, 
    qls_cols, 
    qlp_cols, 
    ntd_cols, 
    ppo_cols];

allD = Dict{String,Dict{String,ECDF{Array{Float64,1}}}}()
allab = Dict{String,Tuple{Float64,Float64}}()
for env in envs
    allab[env] = (Inf, -Inf)
end
for (alg, colnames) in zip(algs, alg_cols)
    algD = Dict{String,ECDF{Array{Float64,1}}}()
    for env in envs
        path = join([base_dir, env, string("allres_", alg, ".csv")], "/")
        if occursin("gw", env) || occursin("chain", env)
            header = colnames[4:end]
        else
            header = colnames
        end
        df = DataFrame!(CSV.File(path, header=header))
        algD[env] = ecdf(df[:, :life])
        a,b = algD[env].sorted_values[[1, end]]
        a = min(allab[env][1], a)
        b = max(allab[env][2], b)
        allab[env] = (a,b)
        num = length(algD[env].sorted_values)
    end
    allD[alg] = algD
end

In [19]:
perfs0 = PerfDists(allD,algs,envs, allab);

In [20]:
moves0 = [length(perfs0.algs), length(perfs0.algs)*length(perfs0.envs)]
strats0, idx2strat0, strat2idx0, alts0 = make_strats(moves0);
C0 = zeros((length(strats0), length(strats0)));

In [21]:
compute_aggregate(perfs0, moves0, strats0, idx2strat0, strat2idx0, alts0)

11-element Array{Float64,1}:
 0.07846770516460083
 0.08513668390499511
 0.15780912545402412
 0.0830895078100108
 0.09301333710469792
 0.4623020814324715
 0.06401653790639289
 0.06886919926774149
 0.4523187959071107
 0.05157417461270635
 0.05080829207380103

In [42]:
pbp_aggscores = compute_aggbound(perfs0, moves0, strats0, idx2strat0, strat2idx0, alts0, 0.05, :PBP)
pbp_ranks = comp_rankings(pbp_aggscores)
pbp_aggdf = DataFrame(aname=algs, 
    score=pbp_aggscores[:, 1], score_low=pbp_aggscores[:, 2], score_high=pbp_aggscores[:, 3], 
    rank=pbp_ranks[1], rank_low=pbp_ranks[2], rank_high=pbp_ranks[3])
pbp_aggdf[sortperm(pbp_aggdf.rank), :]

Unnamed: 0_level_0,aname,score,score_low,score_high,rank,rank_low,rank_high
Unnamed: 0_level_1,String,Float64,Float64,Float64,Int64,Int64,Int64
1,sarsa-parl2,0.462302,0.390445,0.553741,1,2,1
2,q-parl2,0.452319,0.378233,0.563269,2,2,1
3,ac-parl2,0.157809,0.0765444,0.312941,3,11,3
4,sarsa-scaled,0.0930133,0.0337251,0.227599,4,11,3
5,ac-scaled,0.0851367,0.0305087,0.21459,5,11,3
6,sarsa-normal,0.0830895,0.0289775,0.201945,6,11,3
7,ac-normal,0.0784677,0.0275055,0.203338,7,11,3
8,qlambda-scaled,0.0688692,0.0237353,0.197328,8,11,3
9,qlambda-normal,0.0640165,0.0214481,0.178041,9,11,3
10,nactd,0.0515742,0.0180244,0.163611,10,11,3


In [35]:
pbpt_aggscores = compute_aggbound(perfs0, moves0, strats0, idx2strat0, strat2idx0, alts0, 0.05, :PBPt)
pbpt_ranks = comp_rankings(pbpt_aggscores)
pbpt_aggdf = DataFrame(aname=algs, 
    score=pbpt_aggscores[:, 1], score_low=pbpt_aggscores[:, 2], score_high=pbpt_aggscores[:, 3], 
    rank=pbpt_ranks[1], rank_low=pbpt_ranks[2], rank_high=pbpt_ranks[3])
pbpt_aggdf[sortperm(pbpt_aggdf.rank), :]

Unnamed: 0_level_0,aname,score,score_low,score_high,rank,rank_low,rank_high
Unnamed: 0_level_1,String,Float64,Float64,Float64,Int64,Int64,Int64
1,sarsa-parl2,0.462302,0.448929,0.474436,1,2,1
2,q-parl2,0.452319,0.435036,0.483923,2,2,1
3,ac-parl2,0.157809,0.127526,0.177693,3,3,3
4,sarsa-scaled,0.0930133,0.0776577,0.105602,4,8,4
5,ac-scaled,0.0851367,0.0716439,0.0964559,5,9,4
6,sarsa-normal,0.0830895,0.0692397,0.0949889,6,9,4
7,ac-normal,0.0784677,0.0661703,0.0895025,7,9,4
8,qlambda-scaled,0.0688692,0.0584231,0.0790668,8,11,4
9,qlambda-normal,0.0640165,0.0541105,0.0734685,9,11,5
10,nactd,0.0515742,0.0438004,0.060905,10,11,8


In [43]:
CSV.write("./data/agg_pbp.csv", pbp_aggdf[sortperm(pbp_aggdf.rank), :])
CSV.write("./data/agg_pbpt.csv", pbpt_aggdf[sortperm(pbpt_aggdf.rank), :]);

In [40]:
function bootstrap_aggbound(perfs, moves, strats, idx2strat, strat2idx, alts, δ, num_boot)
    
    bs = bootstrap((x)->(compute_aggregate(x, moves, strats, idx2strat, strat2idx, alts)), 
                    perfs, BasicSampling(num_boot)
    )
    numA = length(perfs.algs)
    numE = length(perfs.envs)
    cfbs = confint(bs, PercentileConfInt(1 - (δ/(numA*numE))))
    Y = hcat([cfbs[i][1] for i in 1:numA], [cfbs[i][2] for i in 1:numA], [cfbs[i][3] for i in 1:numA])
    return Y
end

bootstrap_aggbound (generic function with 1 method)

In [79]:
aggres_bs = bootstrap_aggbound(perfs0, moves0, strats0, idx2strat0, strat2idx0, alts0, 0.05, 10000)

11×3 Array{Float64,2}:
 0.0784677  0.0738912  0.0815669
 0.0851367  0.0800206  0.0881374
 0.157809   0.146321   0.162593 
 0.0830895  0.0775537  0.0863444
 0.0930133  0.0873844  0.0962578
 0.462302   0.456154   0.465939 
 0.0640165  0.0604276  0.0664806
 0.0688692  0.0651498  0.0711615
 0.452319   0.446313   0.463339 
 0.0515742  0.0493201  0.0533322
 0.0508083  0.0487978  0.0524612

In [81]:
bs_ranks = comp_rankings(aggres_bs)
bs_aggdf = DataFrame(aname=algs, 
    score=aggres_bs[:, 1], score_low=aggres_bs[:, 2], score_high=aggres_bs[:, 3], 
    rank=bs_ranks[1], rank_low=bs_ranks[2], rank_high=bs_ranks[3])
bs_aggdf[sortperm(bs_aggdf.rank), :]
CSV.write("./data/agg_bs.csv", bs_aggdf[sortperm(bs_aggdf.rank), :])

Unnamed: 0_level_0,aname,score,score_low,score_high,rank,rank_low,rank_high
Unnamed: 0_level_1,String,Float64,Float64,Float64,Int64,Int64,Int64
1,sarsa-parl2,0.462302,0.456154,0.465939,1,2,1
2,q-parl2,0.452319,0.446313,0.463339,2,2,1
3,ac-parl2,0.157809,0.146321,0.162593,3,3,3
4,sarsa-scaled,0.0930133,0.0873844,0.0962578,4,5,4
5,ac-scaled,0.0851367,0.0800206,0.0881374,5,7,4
6,sarsa-normal,0.0830895,0.0775537,0.0863444,6,7,5
7,ac-normal,0.0784677,0.0738912,0.0815669,7,7,5
8,qlambda-scaled,0.0688692,0.0651498,0.0711615,8,9,8
9,qlambda-normal,0.0640165,0.0604276,0.0664806,9,9,8
10,nactd,0.0515742,0.0493201,0.0533322,10,11,10
