### Synthethic Data Experiment

The data for this experiment was generated, the code belows will generate the data, and perform a single run of each version (HDPMM, HDPMM Global Only, DPMM, DPMM Seperated, vHDPMM), and evaluation.

Note that the data generation is random, thus different results may appear, however the ranking of how well the methods perform are consistent. Also, this is a single run, so no mean or std.

In [1]:
using LinearAlgebra
using Clustering
using Random

#### Evaluation Functions 

In [2]:
function evaluate_pred_groups(GT,pred)
    NMI = 0.0
    for i=1:length(pred)
        group_nmi = mutualinfo(GT[i], pred[i]; normed =true)
        if isnan(group_nmi)
            group_nmi = 1.0
        end
        NMI += group_nmi
    end
    return NMI / length(pred)   
end


function evaluate_all_results(GT_Global,
        GT_Local,
        pred_HDPMM,
        pred_HDPMM_GLOBAL,
        pred_DPMM,
        pred_DPMM_seperated,
        pred_vHDPMM_global,
        pred_vHDPMM_local,
        pred_sep_vHDPMM_global,
        pred_sep_vHDPMM_local)
    concat_GT_Local = [GT_Local[i] for i=1:length(GT_Local)]
    concat_GT_Local = reduce(vcat,concat_GT_Local)
    HDPMM_NMI = evaluate_pred_groups(GT_Global, pred_HDPMM)
    HDPMM_GLOBAL_NMI = evaluate_pred_groups(GT_Global, pred_HDPMM_GLOBAL) 
    DPMM_NMI = mutualinfo(concat_GT_Local, pred_DPMM; normed =true)
    DPMM_SEP_NMI = evaluate_pred_groups(GT_Local, pred_DPMM_seperated)
    vHDPMM_GLOBAL_NMI = evaluate_pred_groups(GT_Global, pred_vHDPMM_global)
    vHDPMM_LOCAL_NMI = evaluate_pred_groups(GT_Local, pred_vHDPMM_local)
    vHDPMM_SEP_GLOBAL_NMI = evaluate_pred_groups(GT_Global, pred_sep_vHDPMM_global)
    vHDPMM_SEP_LOCAL_NMI = evaluate_pred_groups(GT_Local, pred_sep_vHDPMM_local)
    println("        HDPMM NMI: $HDPMM_NMI \n
        HDPMM Global NMI: $HDPMM_GLOBAL_NMI \n
        DPMM NMI: $DPMM_NMI \n
        DPMM SEPERATED NMI: $DPMM_SEP_NMI\n
        vHDPMM Global NMI: $vHDPMM_GLOBAL_NMI\n
        vHDPMM Local NMI: $vHDPMM_LOCAL_NMI
        vHDPMM Sep Global NMI: $vHDPMM_SEP_GLOBAL_NMI\n
        vHDPMM Sep Local NMI: $vHDPMM_SEP_LOCAL_NMI")
    return HDPMM_NMI, HDPMM_GLOBAL_NMI, DPMM_NMI, DPMM_SEP_NMI, vHDPMM_GLOBAL_NMI, vHDPMM_LOCAL_NMI, vHDPMM_SEP_GLOBAL_NMI, vHDPMM_SEP_LOCAL_NMI
end
    

evaluate_all_results (generic function with 1 method)

In [4]:
using Distributed
addprocs(2)
@everywhere using DPMMSubClusters
@everywhere using VersatileHDPMixtureModels

In [5]:
function run_methods_and_get_results(data,gprior,lprior,dp_prior,ghdp,gdim)
    #vHDPM
    println("vHDPMM")
    vhdpmm_results = vhdp_fit(data,gdim,100.0,1000.0,100.0,gprior,lprior,50)
    vhdpmm_global = Dict([i=> create_global_labels(vhdpmm_results[1].groups_dict[i]) for i=1:length(data)])
    vhdpmm_local = Dict([i=> vhdpmm_results[1].groups_dict[i].labels for i=1:length(data)])
    #HDP-ALL
    println("HDP-ALL")
    hdp_results = hdp_fit(data,100.0,1000.0,ghdp,50)
    hdp_all_global = Dict([i=> create_global_labels(hdp_results[1].groups_dict[i]) for i=1:length(data)])
    #HDP-GLOBAL
    println("HDP-GLOBAL")
    global_pts = Dict([k=>v[1:gdim,:] for (k,v) in data])
    hdp_global_results = hdp_fit(global_pts,100.0,1000.0,gprior,50)
    hdp_global_global = Dict([i=> create_global_labels(hdp_global_results[1].groups_dict[i]) for i=1:length(data)])
    #DPMM-All
    println("DPMM-ALL")
    all_data = [data[i] for i=1:length(data)]
    all_data = reduce(hcat,all_data)
    dpmm_all_results = DPMMSubClusters.fit(all_data,dp_prior,100.0,iters = 100, verbose = false)
    dpmm_all_local = dpmm_all_results[1]
    #DPMM-Seperated
    println("DPMM-SEP")
    dpmm_seperated_results = [DPMMSubClusters.fit(data[i],dp_prior,100.0,iters = 100, verbose = false) for i=1:length(data)]
    dpmm_seperated_local = [dpmm_seperated_results[i][1] for i=1:length(data)]
    println("vHDPMM-SEP")
    #vHDPM-Seperated
    vhdpmm_results = [vhdp_fit(Dict([1=>data[i]]),gdim,100.0,1000.0,100.0,gprior,lprior,50) for i=1:length(data)]
    vhdpmm_sep_global = Dict([i=> create_global_labels(vhdpmm_results[i][1].groups_dict[1]) for i=1:length(data)])
    vhdpmm_sep_local = Dict([i=> vhdpmm_results[i][1].groups_dict[1].labels for i=1:length(data)])
#     return vhdpmm_results
    return hdp_all_global,hdp_global_global,dpmm_all_local,dpmm_seperated_local, vhdpmm_global, vhdpmm_local,vhdpmm_sep_global,vhdpmm_sep_local
end

run_methods_and_get_results (generic function with 1 method)

### G3/2/5/1

In [6]:
pts,labels = generate_grouped_gaussian_data(20000, 2, 1, 3, 5, 10, false, 25.0, false)
labels_global  = Dict([k=> Int.(v[:,1][:]) for (k,v) in labels])
labels_local  = Dict([k=> Int.(v[:,2][:]) for (k,v) in labels])
g_prior, l_prior = create_default_priors(2,1,:niw)
g_hdp, _ = create_default_priors(3,0,:niw)
dp_prior = DPMMSubClusters.niw_hyperparams(1.0,
            zeros(3),
            6,
            Matrix{Float64}(I, 3, 3)*1)

DPMMSubClusters.niw_hyperparams(1.0f0, Float32[0.0, 0.0, 0.0], 6.0f0, Float32[1.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 1.0])

In [7]:
hdp_all_global,hdp_global_global,dpmm_all_local,dpmm_seperated_local, vhdpmm_global, vhdpmm_local,vhdpmm_sep_global, vhdpmm_sep_local =
    run_methods_and_get_results(pts,g_prior,l_prior,dp_prior,g_hdp,2)

vHDPMM
Iteration: 1|| Global Counts: [10]|| iter time: 15.270561933517456
Iteration: 2|| Global Counts: [10]|| iter time: 0.09072518348693848
Iteration: 3|| Global Counts: [10]|| iter time: 0.0733938217163086
Iteration: 4|| Global Counts: [10]|| iter time: 0.07050585746765137
Iteration: 5|| Global Counts: [10]|| iter time: 0.19986987113952637
Iteration: 6|| Global Counts: [11, 11]|| iter time: 1.3131649494171143
Iteration: 7|| Global Counts: [12, 11]|| iter time: 0.11937594413757324
Iteration: 8|| Global Counts: [15, 11]|| iter time: 0.6617319583892822
Iteration: 9|| Global Counts: [16, 11]|| iter time: 0.12209391593933105
Iteration: 10|| Global Counts: [18, 13]|| iter time: 0.08200716972351074
Iteration: 11|| Global Counts: [18, 13]|| iter time: 0.2327899932861328
Iteration: 12|| Global Counts: [19, 14, 19]|| iter time: 0.08287501335144043
Iteration: 13|| Global Counts: [20, 16, 14]|| iter time: 0.43161988258361816
Iteration: 14|| Global Counts: [14, 17, 13]|| iter time: 0.09724116325

(Dict(7 => [7; 7; … ; 2; 2],4 => [3; 3; … ; 2; 2],9 => [1; 1; … ; 1; 1],10 => [1; 1; … ; 7; 1],2 => [1; 1; … ; 2; 2],3 => [4; 4; … ; 2; 2],5 => [4; 4; … ; 2; 2],8 => [2; 2; … ; 5; 5],6 => [2; 2; … ; 2; 2],1 => [1; 1; … ; 5; 5]…), Dict(7 => [3; 3; … ; 2; 2],4 => [1; 1; … ; 2; 2],9 => [3; 3; … ; 3; 3],10 => [3; 3; … ; 3; 3],2 => [3; 3; … ; 2; 2],3 => [1; 1; … ; 2; 2],5 => [1; 1; … ; 2; 2],8 => [2; 2; … ; 3; 3],6 => [2; 2; … ; 2; 2],1 => [3; 3; … ; 3; 3]…), [3, 3, 3, 3, 3, 3, 3, 3, 3, 3  …  3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [[2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [3, 3, 3, 3, 3, 3, 3, 3, 3, 3  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [3, 3, 3, 3, 3, 3, 3, 3, 3, 3  …  2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [3, 3, 3, 3, 3, 3, 3

In [8]:
evaluate_all_results(labels_global,labels_local,hdp_all_global,hdp_global_global,dpmm_all_local,dpmm_seperated_local, vhdpmm_global, vhdpmm_local,vhdpmm_sep_global, vhdpmm_sep_local)

        HDPMM NMI: 0.8247037529487768 

        HDPMM Global NMI: 0.9965057438935003 

        DPMM NMI: 0.07525700523292128 

        DPMM SEPERATED NMI: 0.7734244337920403

        vHDPMM Global NMI: 0.9987526916835051

        vHDPMM Local NMI: 0.8776641706369462
        vHDPMM Sep Global NMI: 0.9989081132922045

        vHDPMM Sep Local NMI: 0.8799099300832822


(0.8247037529487768, 0.9965057438935003, 0.07525700523292128, 0.7734244337920403, 0.9987526916835051, 0.8776641706369462, 0.9989081132922045, 0.8799099300832822)