In [None]:
using Gadfly, Colors, Cairo, DataFrames, Distributed
addprocs(10);

In [None]:
@everywhere using Distributions, StatsBase, Distances, LinearAlgebra, SharedArrays, JuliennedArrays, Random, Bootstrap, DelimitedFiles

@everywhere begin
    const numb_hyp = 11
    const numb_agent = 50
    const numb_toss = 100
    const likelihood_heads = range(0, stop=1, length=numb_hyp)
    const likelihood_tails = range(1, stop=0, length=numb_hyp)
    const numb_sim = 50
end

In [None]:
# default graphics
Gadfly.push_theme(:default)
set_default_plot_size(9inch, 9inch/MathConstants.golden)

function gen_brew_colors(n) # to create your own colors, here based on one of the brewer series
    cs = distinguishable_colors(n, 
        [colorant"#66c2a5", colorant"#fc8d62", colorant"#8da0cb", colorant"#e78ac3",
            colorant"#a6d854", colorant"#ffd92f", colorant"#e5c494", colorant"#b3b3b3"],
        lchoices=Float64[58, 45, 72.5, 90],
        transform=c->deuteranopic(c, 0.1),
        cchoices=Float64[20,40],
        hchoices=[75,51,35,120,180,210,270,310]
    )
    convert(Vector{Color}, cs)
end

### Data generation function

The following function generates sequences of coin tosses, one per agent, where the bias of the coin is given by `bias` and the length of the sequences by `numb_toss`.

In [None]:
@everywhere function generate_data(bias::Float64)
    sim_dat = Array{Int64,2}(undef, numb_agent, numb_toss)
    for i in 1:numb_agent
        sim_dat[i, :] = rand(Bernoulli(bias), numb_toss)
    end
    return sim_dat
end

### Update rules

These are the update rules to be compared. The first is Bayes' rule, which has `bonus` as a kind of dummy parameter, to make sure it has to right form for the evolutionary computing to be carried out later. The other rules can in fact also do duty as Bayes' rule (which is the limiting case of each of them, obtained by setting `bonus = 0`).

In [None]:
# Bayes' rule
@everywhere function b_upd(probs::Array{Float64,1}, dat::Array{Int64,1}, toss_num::Int64, bonus::Float64)
    if dat[toss_num] == 1
        @. (probs * likelihood_heads) / $dot(probs, likelihood_heads)
    else
        @. (probs * likelihood_tails) / $dot(probs, likelihood_tails)
    end
end

# EXPL
@everywhere function expl_upd(probs::Array{Float64,1}, dat::Array{Int64,1}, toss_num::Int64, bonus::Float64)
    val::Float64 = mean(dat[1:toss_num]) * (numb_hyp - 1.0) + 1.0
    vec::Array{Float64,1} = if dat[toss_num] == 1
        @. (probs * likelihood_heads) / $dot(probs, likelihood_heads)
    else
        @. (probs * likelihood_tails) / $dot(probs, likelihood_tails)
    end
    if val % 1 == .5
        vec[floor(Int, val)] += bonus*0.5
        vec[ceil(Int, val)] += bonus*0.5
    else
        vec[round(Int, val)] += bonus
    end
    return vec / (1.0 + bonus)
end

#Good's rule; with λ=2 (default value), we obtain the rule L2 from Douven & Schupbach, 2015 (Frontiers paper)
@everywhere function good_bonus(probs::Array{Float64,1}, res::Int64, λ=2.0)
    pE::Float64 = res == 1 ? dot(probs, likelihood_heads) : dot(probs, likelihood_tails)
    gb::Array{Float64,1} = res == 1 ? log.(likelihood_heads ./ pE) : log.(likelihood_tails ./ pE)
    function rsc(i)
        if i >= 0
            1 - exp(2λ^2 * -i^2)
        else
            -1 + exp(2λ^2 * -i^2)
        end
    end
    return map(rsc, gb)
end

# γ is the proportion of the probability that gets added as a bonus, so necessarily γ ⩽ 1
@everywhere function good_upd(probs::Array{Float64, 1}, dat::Array{Int64,1}, toss_num::Int64, γ::Float64)
    res::Int64 = dat[toss_num]
    probvec::Array{Float64,1} = if res == 1
        @. (probs * likelihood_heads) / $dot(probs, likelihood_heads)
    else
        @. (probs * likelihood_tails) / $dot(probs, likelihood_tails)
    end
    goodvec::Array{Float64,1} = probvec + γ .* (probvec .* good_bonus(probs, res))
    return goodvec / sum(goodvec)
end


# Popper's rule; in pop_upd, γ is the proportion of the probability that gets added as a bonus, so necessarily γ ⩽ 1
@everywhere function pop_bonus(probs::Array{Float64,1}, res::Int64)
    pE::Float64 = res == 1 ? dot(probs, likelihood_heads) : dot(probs, likelihood_tails)
    pb::Array{Float64,1} = res == 1 ? (likelihood_heads .- pE) ./ (likelihood_heads .+ pE) : (likelihood_tails .- pE) ./ (likelihood_tails .+ pE)
end

@everywhere function pop_upd(probs::Array{Float64,1}, dat::Array{Int64,1}, toss_num::Int, γ::Float64)
    res::Int64 = dat[toss_num]
    probvec::Array{Float64,1} = if res == 1
        @. (probs * likelihood_heads) / $dot(probs, likelihood_heads)
    else
        @. (probs * likelihood_tails) / $dot(probs, likelihood_tails)
    end
    popvec::Array{Float64,1} = probvec + γ .* (probvec .* pop_bonus(probs, res))
    return popvec / sum(popvec)
end

In [None]:
@everywhere function social_learning(rule::Function, dist::DataType, averaging, bonus::Float64, ϵ::Float64, sdat::Array{Int64,2})
    UPD = Array{Float64,3}(undef, numb_hyp, numb_agent, numb_toss + 1)
    UPD[:, :, 1] = repeat(fill(1/numb_hyp, numb_hyp), 1, numb_agent)
    PROB = Array{Float64,2}(undef, numb_hyp, numb_agent)
    f(x::Float64) = (x<=ϵ)::Bool
    @inbounds for t in 1:numb_toss
        for i in 1:numb_agent
            PROB[:, i] = rule(UPD[:, i, t], sdat[i, :], t, bonus)
        end
        prob_dist::Array{Float64,2} = pairwise(dist(), PROB, dims=2)
        peers::Array{Bool,2} = map(f, prob_dist)
        if averaging == mean
            @inbounds for i::Int in 1:numb_agent
               v = @view PROB[:, peers[i, :]]
               UPD[:, i, t + 1] = mean(v', dims=1)
            end
        else
            @inbounds for i::Int in 1:numb_agent
               q = map(averaging, Slices(PROB[:, peers[i, :]], False(), True()))
               UPD[:, i, t + 1] = q / sum(q)
            end
        end
    end
    return UPD
end

In [None]:
@everywhere function survWei(res::Tuple{Int64,Int64},
                             hyp::Int64,
                             a::Float64=rand(Uniform(1, 10)), 
                             b::Float64=rand(Uniform(1, 10)),
                             shape::Float64=rand(Uniform(.5, 5)), 
                             scale::Float64=rand(Uniform(50, 250)))
    
    # cdf(Weibull(shape, scale), p) below gives the probability of death at the relevant time
    
    p = first(res)

    if last(res) == hyp
        1 - (cdf(Weibull(shape, scale), p) / a) # probability goes down if right intervention is made (which is made when the truth is assigned a probability above thresh)
    elseif last(res) == -1
        1 - cdf(Weibull(shape, scale), numb_toss + 1) # if no intervention is made, output survival probability at last time step
    else
        (1 + (b - 1) * cdf(Weibull(shape, scale), p)) / b # probability goes down if wrong intervention is made (which happens if a false hypothesis is assigned a probabilty above thresh)
    end
end

In [None]:
@everywhere function treat_patients(rule::Function, dist::DataType, averaging, bonus::Float64, ϵ::Float64)
    score = 0.0
    i = 1
    while i <= 100
        rand_hyp::Int64 = rand(1:11)
        dt = generate_data((rand_hyp - 1) / 10)
        sl = social_learning(rule, dist, averaging, bonus, ϵ, dt)
        m = mapslices(findmax, mapslices(x->x.>25, sum(mapslices(x->x.>.9, sl, dims=(1, 3)), dims=2), dims=1), dims=1)
        v = reshape(m, 101)
        c = [ first(v[i]) == true ? last(v[i]) : -1 for i in 1:length(v) ]
        ff = something(findfirst(x->x.>0, c), 101)
        cff = c[ff]
        res = (ff, cff)
        score += survWei(res, rand_hyp)
        i += 1
    end
    return score / 100
end

In [None]:
@everywhere function offspring(a::Int64, b::Int64, mat::Array{Any,2})
    r::Int64 = rand(1:3)
    s::Vector{Int64} = sample(1:3, r, replace=false, ordered=true)
    sd::Vector{Int64} = setdiff(1:3, s)
    c = Vector{Any}(undef, 5)
    c[s] = mat[a, :][s]
    c[sd] = mat[b, :][sd]
    c[4] = c[1] == b_upd ? 0.0 : rand(truncated(Normal(mean([mat[a, 4], mat[b, 4]]), std(mat[:, 4])), 0.0, 1.0))
    c[5] = rand(truncated(Normal(mean([mat[a, 5], mat[b, 5]]), std(mat[:, 5])), 0.0, 1.0))
    return c::Vector{Any}
end

In [None]:
@everywhere function new_generation(mat::Array{Any,2})
    scrs = [ treat_patients(mat[i, :]...) for i in 1:36 ]
    np = hcat(mat, scrs)
    sel = np[sortperm(np[:, end], rev=true), :]
    nnp = sel[1:18, 1:5]
    smp1 = shuffle(1:18)
    smp2 = shuffle(1:18)
    offspr = Array{Any,2}(undef, 18, 5)
    @inbounds for i in 1:18
        offspr[i, :] = offspring(smp1[i], smp2[i], nnp)
    end
    return vcat(offspr, nnp), mean(scrs), std(scrs)
end

In [None]:
@everywhere function single_run(r)
    rule_id = vcat(repeat([b_upd], 9), repeat([expl_upd], 9), repeat([good_upd], 9), repeat([pop_upd], 9))
    c_vals = vcat(repeat([0.0], 9), rand(Uniform(), 27)) # bonus values are randomly chosen from [0, 1]
    eps_vals = rand(Uniform(), 36) # same for ϵ
    dist_id = repeat(vcat(repeat([Euclidean], 3), repeat([Cityblock], 3), repeat([KLDivergence], 3)), 4)
    av_id = repeat([mean, geomean, harmmean], 12)
    pop_start = hcat(rule_id, dist_id, av_id, c_vals, eps_vals)

    run(`mkdir gen_data_new$r`)
    open("gen_data_new$r/gen1.txt", "w") do io
        writedlm(io, pop_start)
    end

    scr = Array{Float64,1}(undef, 49)
    scr_std = Array{Float64,1}(undef, 49)

    old = pop_start
    for i in 2:50
        ng = new_generation(old)
        new = ng[1]
        scr[i - 1] = ng[2]
        scr_std[i - 1] = ng[3]
        open("gen_data_new$r/gen$i.txt", "w") do io
            writedlm(io, new)
        end
        old .= new
        new = nothing
        GC.gc()
    end
    open("gen_data_new$r/SCR.txt", "w") do io
        writedlm(io, scr)
    end
    open("gen_data_new$r/SCRstd.txt", "w") do io
        writedlm(io, scr_std)
    end
end

We run the function 15 times, in parallel.

In [None]:
pmap(single_run, 1:15);

Load the data concerning the final generation in each run.

In [None]:
final_gens = Array{Any,3}(undef, 36, 5, 15)

for i in 1:15
    final_gens[:, :, i] = readdlm("gen_data_new$i/gen50.txt")
end

Count of update rules represented in last generations.

In [None]:
countmap(final_gens[:, 1, :])

In [None]:
p = plot(x = ["Bayes", "EXPL", "Good", "Popper"], y = [27, 88, 0, 425], Geom.bar,
    Guide.xlabel("Rule"),
    Guide.ylabel("Count"),
    Scale.x_discrete,
    style(default_color=colorant"#66c2a5", minor_label_font_size=11pt, minor_label_color=colorant"black", grid_color=colorant"#E8E8E8", minor_label_font="DeJaVu Sans",  major_label_font="DeJaVu Sans", major_label_font_size=15pt, major_label_color=colorant"black",
            bar_spacing=35pt))

In [None]:
draw(PDF("barplot_addendum1.pdf", 7inch, 7inch/MathConstants.golden), p);

Same for distance measures and, respectively, pooling methods.

In [None]:
dns = vcat([final_gens[:, :, i] for i in 1:15]...);

In [None]:
expl_bns = dns[dns[:, 1] .== "expl_upd", 4]
#good_bns = dns[dns[:, 1] .== "good_upd", 4] good's rule not present in final generations
pop_bns = dns[dns[:, 1] .== "pop_upd", 4];

In [None]:
df = DataFrame(Rule = vcat(fill("EXPL", length(expl_bns)), fill("Popper", length(pop_bns))), C_value = vcat(expl_bns, pop_bns));

In [None]:
p = plot(df, x=:C_value, color=:Rule, Geom.density(bandwidth=.02),
     Coord.cartesian(xmin=-.0075, xmax=1.1),
     Scale.color_discrete_manual(hcat(gen_brew_colors(4)[2:4][1], gen_brew_colors(4)[2:4][3])...),
     Guide.xlabel("Bonus value"),
     Guide.ylabel("Density"),
     style(line_width=2.65pt, minor_label_font_size=11pt, minor_label_color=colorant"black", minor_label_font="DeJaVu Sans", key_title_font_size=14pt, 
        key_label_color=colorant"black", grid_color=colorant"#E8E8E8", key_title_color=colorant"black", key_label_font="DeJaVu Sans", 
        major_label_font="DeJaVu Sans", key_label_font_size=12pt, major_label_font_size=15pt, major_label_color=colorant"black",
        colorkey_swatch_shape=:square))

In [None]:
draw(PDF("densplot_addendum1.pdf", 7inch, 7inch/MathConstants.golden), p);

In [None]:
countmap(final_gens[:, 2, :])

In [None]:
countmap(final_gens[:, 3, :])

Mean and standard deviation of explanation bonus of best groups in last generations. 

In [None]:
mean(final_gens[1, 4, :][:]), std(final_gens[1, 4, :][:])

Same for $\epsilon$ values.

In [None]:
mean(final_gens[1, 5, :][:]), std(final_gens[1, 5, :][:]) 

Counts of types of update rule, types of metric, and types of pooling method -- to create figures.

In [None]:
all_gens = Array{Any,3}(undef, 36, 5, 50)

for i in 1:50
    all_gens[:, :, i] = readdlm("gen_data_new3/gen$i.txt")
end

In [None]:
ks = [keys(countmap(all_gens[:, 1, i])) for i in 1:50]
vls = [values(countmap(all_gens[:, 1, i])) for i in 1:50]

bayes = Vector{Int64}(undef, 50)
good = Vector{Int64}(undef, 50)
expl = Vector{Int64}(undef, 50)
pop = Vector{Int64}(undef, 50)

for i in 1:50
    bayes[i] = Int(collect(vls[i])[findall(x->x=="b_upd", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="b_upd", collect(ks[i]))][1])
    good[i] = Int(collect(vls[i])[findall(x->x=="good_upd", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="good_upd", collect(ks[i]))][1])
    expl[i] = Int(collect(vls[i])[findall(x->x=="expl_upd", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="expl_upd", collect(ks[i]))][1])
    pop[i] = Int(collect(vls[i])[findall(x->x=="pop_upd", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="pop_upd", collect(ks[i]))][1])
end

In [None]:
rule_types = hcat(bayes, expl, good, pop);

In [None]:
open("rule_types1.txt", "w") do io
    writedlm(io, rule_types)
end

In [None]:
ks = [keys(countmap(all_gens[:, 2, i])) for i in 1:50]
vls = [values(countmap(all_gens[:, 2, i])) for i in 1:50]

eucl = Vector{Int64}(undef, 50)
city = Vector{Int64}(undef, 50)
kl = Vector{Int64}(undef, 50)

for i in 1:50
    eucl[i] = Int(collect(vls[i])[findall(x->x=="Euclidean", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="Euclidean", collect(ks[i]))][1])
    city[i] = Int(collect(vls[i])[findall(x->x=="Cityblock", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="Cityblock", collect(ks[i]))][1])
    kl[i] = Int(collect(vls[i])[findall(x->x=="KLDivergence", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="KLDivergence", collect(ks[i]))][1])
end

In [None]:
dist_types = hcat(eucl, kl, city);

In [None]:
open("dist_types1.txt", "w") do io
    writedlm(io, dist_types)
end

In [None]:
ks = [keys(countmap(all_gens[:, 3, i])) for i in 1:50]
vls = [values(countmap(all_gens[:, 3, i])) for i in 1:50]

mn = Vector{Int64}(undef, 50)
gmn = Vector{Int64}(undef, 50)
hmn = Vector{Int64}(undef, 50)

for i in 1:50
    mn[i] = Int(collect(vls[i])[findall(x->x=="mean", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="mean", collect(ks[i]))][1])
    gmn[i] = Int(collect(vls[i])[findall(x->x=="geomean", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="geomean", collect(ks[i]))][1])
    hmn[i] = Int(collect(vls[i])[findall(x->x=="harmmean", collect(ks[i]))] != [] && collect(vls[i])[findall(x->x=="harmmean", collect(ks[i]))][1])
end

In [None]:
av_types = hcat(mn, gmn, hmn);

In [None]:
open("av_types1.txt", "w") do io
    writedlm(io, av_types)
end

Mean and standard deviation of bonus values.

In [None]:
c_mn = Float64[]
c_std = Float64[]

for i in 1:50
    lst = all_gens[:, :, i]
    m, s = mean_and_std(convert(Array{Float64,1}, lst[:, 4]))
    push!(c_mn, m)
    push!(c_std, s)
end

In [None]:
bonus_gen = hcat(round.(c_mn, digits=6), round.(c_std, digits=6));

In [None]:
open("gen_bonus1.txt", "w") do io
    writedlm(io, bonus_gen)
end

Same for $\epsilon$ values.

In [None]:
eps_mn = Float64[]
eps_std = Float64[]

for i in 1:50
    lst = all_gens[:, :, i]
    m, s = mean_and_std(convert(Array{Float64,1}, lst[:, 5]))
    push!(eps_mn, m)
    push!(eps_std, s)
end

In [None]:
eps_gen = hcat(round.(eps_mn, digits=6), round.(eps_std, digits=6));

In [None]:
open("eps1.txt", "w") do io
    writedlm(io, eps_gen)
end