In [128]:
using JLD
include("../data/synthetic.jl")
using .Synthetic
using TOML
using Gen
using PyPlot
using LinearAlgebra
using ProgressBars
using Statistics
using Distributions



In [267]:
experiment = 1
logmeanexp(x) = logsumexp(x)-log(length(x))

logmeanexp (generic function with 1 method)

In [268]:
path =  "../data/synthetic/$(experiment).toml"
SigmaU, U, T, X, Y, epsY, ftxu = generate_synthetic_confounder(path)
obj_size = TOML.parsefile(path)["data"]["obj_size"]
n, nX = size(X)
print()

# list of obj labels: [1,1,1, ... j,j,j]
label = 1
obj_label = zeros(n)
for i in 1:n
    obj_label[i] = label
    if (i % obj_size) == 0
        label += 1
    end 
end 

In [269]:
# generate initial parameters from prior
# ty, xy, uy, ynoise
@gen function generateLS(mean, scale)
    LS = @trace(normal(mean, scale), :LS)
    return LS
end
@gen function generateNoise(shape, scale)
    Noise = @trace(inv_gamma(shape, scale), :Noise)
    return Noise
end
nObj = Int(n/obj_size)
MappedGenerateLS = Map(generateLS)
MappedGenerateNoise = Map(generateNoise)

Map{Any,Gen.DynamicDSLTrace}(DynamicDSLFunction{Any}(Dict{Symbol,Any}(), Dict{Symbol,Any}(), Type[Any, Any], ##generateNoise#442, Bool[0, 0], false))

In [270]:
@gen function thetaProposal(trace, var::Float64)
    mu = trace[:theta]
    @trace(normal(mu, var), :theta)
end

@gen function alphaProposal(trace, i::Int, var::Float64)
    mu = trace[:alpha => i => :LS]
    @trace(normal(mu, var), :alpha => i => :LS)
end

@gen function betaProposal(trace, i::Int, var::Float64)
    mu = trace[:beta => i => :LS]
    @trace(normal(mu, var), :beta => i => :LS)
end

@gen function NoiseProposal(trace, var::Float64)
    cur = trace[:noise]
    
    Shape = (cur * cur / var) + 2
    Scale = cur * (Shape - 1)
    
    @trace(inv_gamma(Shape, Scale), :noise)
end


DynamicDSLFunction{Any}(Dict{Symbol,Any}(), Dict{Symbol,Any}(), Type[Any, Float64], ##NoiseProposal#446, Bool[0, 0], false)

In [271]:
@gen function LinearMLM(xs::Vector{Float64}, ts::Vector{Float64}, obj_label, nObj)
    n, nX = size(xs)
    beta =  @trace(MappedGenerateLS(fill(0.0, nX), fill(1.0, nX)), :beta) #xyLS
    theta = @trace(MappedGenerateLS(fill(0.0, nObj), fill(10.0, nObj)), :theta) #tyLS
    alpha = @trace(MappedGenerateLS(fill(0.0, nObj), fill(10.0, nObj)), :alpha) #alpha
    sigma = @trace(inv_gamma(4.0, 4.0), :noise) #tyLS
    for i in 1:n
        obj = Int(obj_label[i])
        t = ts[i]
        x = xs[i, :]
        @trace(normal(sum(beta.*x) + theta[obj] * t + alpha[obj], sigma), "y-$i")
    end
end

DynamicDSLFunction{Any}(Dict{Symbol,Any}(), Dict{Symbol,Any}(), Type[Array{Float64,1}, Array{Float64,1}, Any, Any], ##LinearMLM#447, Bool[0, 0, 0, 0], false)

In [272]:
@gen function LinearMLMOffset(xs::Vector{Float64}, ts::Vector{Float64}, obj_label, nObj)
    n, nX = size(xs)
    beta =  @trace(MappedGenerateLS(fill(0.0, nX), fill(1.0, nX)), :beta) #xyLS
    theta = @trace(normal(0,1), :theta) #tyLS
    alpha = @trace(MappedGenerateLS(fill(0.0, nObj), fill(10.0, nObj)), :alpha) #alpha
    sigma = @trace(inv_gamma(4.0, 4.0), :noise) #tyLS
    for i in 1:n
        obj = Int(obj_label[i])
        t = ts[i]
        x = xs[i, :]
        @trace(normal( sum(beta.*x) +theta * t + alpha[obj], sigma), "y-$i")
    end
end

DynamicDSLFunction{Any}(Dict{Symbol,Any}(), Dict{Symbol,Any}(), Type[Array{Float64,1}, Array{Float64,1}, Any, Any], ##LinearMLMOffset#448, Bool[0, 0, 0, 0], false)

In [273]:
constraints = choicemap()
for (i, y) in enumerate(Y)
    constraints["y-$i"] = y
end
n_run = 1000
PosteriorSamples = []
(trace, _) = generate(LinearMLMOffset, (X, T, obj_label, nObj), constraints)
for iter=tqdm(1:n_run)
    (trace, _) = mh(trace, thetaProposal, (0.5, ))
    (trace, _) = mh(trace, NoiseProposal, (0.5, ))
    for k in 1:nObj
        (trace, _) = mh(trace, alphaProposal, (k, 0.5))
    end 
    for k in 1:nX
        (trace, _) = mh(trace, betaProposal,  (k, 0.5))
    end 
    
    push!(PosteriorSamples, get_choices(trace))
end

100.00%┣████████████████████████████████████████████████████████▉┫ 1000/1000 00:30<00:00, 33.17 it/s]


In [240]:
if maximum(T) == 1
    binary = true
    doTs = [1.0, 0.0]
else
    binary = false
    doTnSteps = 20
    lower = minimum(T) * 1.05
    upper = maximum(T) * 0.95
    doTs = [i for i in range(lower,stop=upper,length=doTnSteps)]
end
print()

In [264]:
# evaluarion
Ycfs, mask, doT = [], nothing, nothing
preds = []
avg = nothing
avg_noises= []
for (i, doT) in enumerate(doTs)
    mask = T .!= doT
    push!(Ycfs, ftxu(fill(doT, sum(mask)), X[mask, :], U[mask, :], epsY[mask]))
    pred_at_doT = []
    noises= []
    for j in 801:n_run
        theta = PosteriorSamples[j][:theta]
        beta = [PosteriorSamples[j][:beta=>k=>:LS] for k in 1:nX]
        alpha = [PosteriorSamples[j][:alpha=>k=>:LS] for k in 1:nObj]
        noise = PosteriorSamples[j][:noise]
        
        Ypred = (X[mask, :] * beta .+ theta * doT .+ alpha[Int.(obj_label[mask])])
        push!(pred_at_doT, Ypred)
        push!(noises,noise)
    end
    avg = [mean([pred_at_doT[i][j] for i in 1:200]) for j in 1:length(pred_at_doT[i])]
    push!(preds, avg)
    
end

In [274]:
PEHEs = []
for k in 1:length(doTs)
    push!(PEHEs,(mean((Ycfs[k] .- [preds[k][i] for i in 1:size(preds[k])[1]]).^2)))
end 
(PEHEs.^0.5)

2-element Array{Float64,1}:
 8.325358702043946 
 3.6800151313184988

In [275]:
doTs

2-element Array{Float64,1}:
 1.0
 0.0

In [260]:
llh = []
for doT in 1:doTnSteps
    mu = [preds[doT][i] for i in 1:size(preds[doT])[1]] # 
    
    println(length(mu), " ", length(avg_noises), " ", length(Ycfs[doT]))
    push!(llh, logmeanexp([Distributions.logpdf(Normal(mu[j], noises[j]), Ycfs[doT][j]) for j in 1:length(mu)]))
end
mean(llh)

119 2 119
81 2 81


BoundsError: BoundsError: attempt to access 2-element Array{Any,1} at index [3]

In [275]:
function runMLM(experiment)
    path =  "../data/synthetic/$(experiment).toml"
    SigmaU, U, T, X, Y, epsY, ftxu = generate_synthetic_confounder(path)
    obj_size = TOML.parsefile(path)["data"]["obj_size"]
    n, nX = size(X)
    print()

    # list of obj labels: [1,1,1, ... j,j,j]
    label = 1
    obj_label = zeros(n)
    for i in 1:n
        obj_label[i] = label
        if (i % obj_size) == 0
            label += 1
        end 
    end 
    
    # train MLM
    constraints = choicemap()
    for (i, y) in enumerate(Y)
        constraints["y-$i"] = y
    end
    n_run = 1000
    PosteriorSamples = []
    (trace, _) = generate(LinearMLMOffset, (X, T, obj_label, nObj), constraints)
    for iter=(1:n_run)
        (trace, _) = mh(trace, thetaProposal, (0.5, ))
        (trace, _) = mh(trace, NoiseProposal, (0.5, ))
        for k in 1:nObj
            (trace, _) = mh(trace, alphaProposal, (k, 0.5))
        end 
        for k in 1:nX
            (trace, _) = mh(trace, betaProposal,  (k, 0.5))
        end 

        push!(PosteriorSamples, get_choices(trace))
    end
    
    if maximum(T) == 1
    binary = true
    doTs = [1.0, 0.0]
    else
        binary = false
        doTnSteps = 20
        lower = minimum(T) * 1.05
        upper = maximum(T) * 0.95
        doTs = [i for i in range(lower,stop=upper,length=doTnSteps)]
    end

    # evaluarion
    Ycfs, mask, doT = [], nothing, nothing
    preds = []
    for (i, doT) in enumerate(doTs)
        mask = T .!= doT
        push!(Ycfs, ftxu(fill(doT, sum(mask)), X[mask, :], U[mask, :], epsY[mask]))
        pred_at_doT = []
        for j in 801:n_run
            theta = PosteriorSamples[j][:theta]
            beta = [PosteriorSamples[j][:beta=>k=>:LS] for k in 1:nX]
            alpha = [PosteriorSamples[j][:alpha=>k=>:LS] for k in 1:nObj]
            Ypred = (X[mask, :] * beta .+ theta * doT .+ alpha[Int.(obj_label[mask])])
            push!(pred_at_doT, Ypred)
        end
        avg = [mean([pred_at_doT[i][j] for i in 1:200]) for j in 1:length(pred_at_doT[i])]
        push!(preds, avg)
    end
    
    PEHEs = []
    for k in 1:length(doTs)
        push!(PEHEs,(mean((Ycfs[k] .- [preds[k][i] for i in 1:size(preds[k])[1]]).^2)))
    end 
    mean(PEHEs.^0.5)
end

runMLM (generic function with 1 method)

In [276]:
for i in 1:12
    println(i)
    println(runMLM(i))
end 

1
0.5322290779343195
2
0.44025433695871174
3
11.900639863543852
4
6.833037204611493
5


InterruptException: InterruptException:

In [274]:
for i in 1:12
    println(i)
    println(runMLM(i))
end 

1


MethodError: MethodError: no method matching ^(::Array{Any,1}, ::Float64)
Closest candidates are:
  ^(!Matched::Missing, ::Number) at missing.jl:94
  ^(!Matched::Float64, ::Float64) at math.jl:781
  ^(!Matched::Irrational{:ℯ}, ::Number) at mathconstants.jl:91
  ...