<h1>Bernoulli Mixture Density Net</h1>
A neural network trained to output the parameters of a Multivariate Bernoulli Mixture Model

In [1]:
include("../toy_ffnet.jl") # module nn

pdf_bernoulli(p,t) = p^t * (1-p)^(1-t)

function EM_bernoulli(t, m,μ)
    P=size(t,1)
    N=size(t,2)
    G=length(m)
    
    τ=zeros(N,G)
    for g=1:G, n=1:N; τ[n,g] = m[g] * prod([pdf_bernoulli(μ[p,g], t[p,n]) for p=1:P]); end
    τ = τ./sum(τ,2)
    
    T1 = sum(τ,1)
    T2 = t*τ
    m_new = T1./N
    μ_new = T2./T1
    return m_new', μ_new
end

function ∇mixturemodel!(∇z, z,t, P,G,bsz)   
    for b=1:bsz
        m=nn.softmax(z[1:G,b])
        μ=nn.sigm(z[G+1:end,b]); μ=reshape(μ, P,G)
        m_new, μ_new = EM_bernoulli(t, m,μ)
        
        ∇zm = m - m_new
        ∇zμ = (μ - μ_new).*m_new'
        
        ∇z[2][:,b] = vcat(∇zm[:],∇zμ[:])
    end
end

function some_data(P,N)
    #create some patterns
    n=Int(N/6)
    x1=[1,0,0,0,0] * ones(n)'
    x2=[1,0,0,0,0] * ones(n)'
    
    x3=[0,1,0,0,0] * ones(n)'
    x4=[0,1,0,0,0] * ones(n)'
    
    x5=[0,0,1,0,0] * ones(n)'
    x6=[0,0,1,0,0] * ones(n)'
    
    t1=[1,1,0,0,0] * ones(n)'
    t2=[0,0,1,1,0] * ones(n)'
    
    t3=[1,1,0,0,0] * ones(n)'
    t4=[0,1,1,0,0] * ones(n)'
    
    t5=[1,1,1,0,0] * ones(n)'
    t6=[0,0,1,1,1] * ones(n)'
    
    x=hcat(x1,x2,x3,x4,x5,x6)
    t=hcat(t1,t2,t3,t4,t5,t6)
    return x, t
end

function print_mapping(xeval, m,μ)
    S=size(xeval,2)
    G=size(m,1)
    for s=1:S
        println("input")
        println(s,": ",xeval[:,s])
        println("maps to")
        μs=round.(reshape(μ[:,s], P,G),1)
        [println(g,": ",round.(m[g,s],1), " * ",μs[:,g]) for g=1:G];
        println()
    end
end

print_mapping (generic function with 1 method)

In [2]:
function trainMDN(P,G)
    N=600
    Lw=[P,20, G+P*G] #m,μ
    bsz=1
    x,t = some_data(P,N)

    W,b,z,y, ∇W,∇b,∇z,∇y = nn.preallocate(Lw, bsz)

    for epoch=1:1000
        idx=randperm(size(x,2))

        for n=1:bsz:size(x,2)-bsz+1
            bidx=idx[n:n+bsz-1]
            xb=x[:,bidx]
            tb=t[:,bidx]
            nn.fprop!(xb, W,b,z,y)
            ∇mixturemodel!(∇z, y[2],tb, P,G,bsz)
            nn.bprop!(∇W,∇z,∇y, W,xb,y)
            nn.adjust!(W,b, ∇W,∇z, 0.0001)
        end
    end
    return W,b
end

P=5
G=2
W,b = trainMDN(P,G);

xeval,_ = some_data(P,6)
xeval=xeval[:,1:2:end]
Y=nn.inference(xeval,W,b);

m=nn.softmax(Y[1:G,:])
μ=nn.sigm(Y[G+1:end,:]);

print_mapping(xeval, m,μ)

input
1: [1.0, 0.0, 0.0, 0.0, 0.0]
maps to
1: 0.5 * [0.0, 0.0, 1.0, 1.0, 0.0]
2: 0.5 * [1.0, 1.0, 0.0, 0.0, 0.0]

input
2: [0.0, 1.0, 0.0, 0.0, 0.0]
maps to
1: 0.0 * [0.0, 0.0, 1.0, 1.0, 0.9]
2: 1.0 * [0.5, 1.0, 0.5, 0.0, 0.0]

input
3: [0.0, 0.0, 1.0, 0.0, 0.0]
maps to
1: 0.5 * [0.0, 0.0, 1.0, 1.0, 1.0]
2: 0.5 * [1.0, 1.0, 1.0, 0.0, 0.0]



<h3>some notes:</h3>
use bsz=1, otherwise the components seems to prefer not to specialize

<p>a single input x=[1,0,0,0,0] can map to several targets.
<p>if targets are [1,1,0,0,0] and [0,0,1,1,0] it maps to those components 0.5 x [1,1,0,0,0], 0.5 x [0,0,1,1,0]
<p>but if targets are [1,1,0,0,0] and [0,1,1,0,0] (shares a 1 in element 2)
<p>it sometimes maps to the desired 0.5 x [1,1,0,0,0], 0.5 x [0,0,1,1,0] 
<p>but often it maps to the less desirable 1.0 x [0.5,1,0.5,0,0]

//Anders