In [53]:
## Generate a graph
using StatsBase
using Combinatorics

include("jl/omega.jl")
include("jl/HSBM.jl")

# parameters

n = 50
Z = rand(1:5, n)
ϑ = dropdims(ones(1,n) + rand(1,n), dims = 1)

# defining group intensity function Ω
μ = mean(ϑ)

kmax = 4
kmin = 1

fk = k->(2 .*μ*k)^(-k)
fp = x->harmonicMean(x)^3

Ω_dict = Dict{Vector{Int64}, Float64}()

for k = kmin:kmax, p in partitions(k)
    Ω_dict[p] = fk(sum(p))*fp(p)/100
end

for p in keys(Ω_dict)
    if sum(p) == 1
        Ω_dict[p] = 0
    end
end

Ω = buildΩ(Ω_dict; by_size=true)

Ω (generic function with 1 method)

In [54]:
## Sample
H = sampleSBM(Z, ϑ, Ω; kmax=kmax, kmin = kmin)

hypergraph
  N: Array{Int64}((50,)) [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
  E: Dict{Integer,Dict}
  D: Array{Integer}((50,)) Integer[2, 2, 0, 5, 0, 2, 1, 0, 2, 1  …  6, 1, 6, 4, 1, 1, 3, 2, 3, 4]


In [55]:
## Run Louvains, Naive, and Faster version
include("jl/hypergraph_louvain.jl")

@time Zlou = Naive_HyperLouvain(H,Ω)
@time Zlou2 = HyperLouvain(H,kmax,Ω)

mlou = modularity(H,Zlou,Ω)
mlou2 = modularity(H,Zlou2,Ω)

mlou ≈ mlou2


Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
  5.125278 seconds (22.10 M allocations: 1.147 GiB, 17.31% gc time)

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
  1.138947 seconds (2.48 M allocations: 101.197 MiB, 2.99% gc time)


true

In [56]:
include("jl/inference.jl")

ω̂_lou2 = estimateΩ(H, Zlou2)
Ω̂_lou2 = buildΩ(ω̂_lou2; by_size=true)

ω̂_sing = estimateΩ(H, Zsing)
Ω̂_sing = buildΩ(ω̂_sing; by_size=true)

ω̂ = estimateΩ(H, Z)
Ω̂ = buildΩ(ω̂; by_size=true)

(::var"#Ω#731"{var"#Ω#730#732"{Dict{Int64,Dict{Array{Int64,1},Float64}}}}) (generic function with 1 method)

In [57]:
# Also compare modularity scores against the ground truth community,
# and all singleton clusters
# Strange that trivially putting all nodes in one cluster does better than the ground truth...
# Am I missing something?

Zsing = collect(1:n)

Q_true = convert(Float64, modularity(H, Z, Ω̂))
Q_sing = convert(Float64, modularity(H, Zsing, Ω̂_sing))
Q_lou  = convert(Float64, modularity(H, Zlou2, Ω̂_lou2))

println("The modularity of the true partition is $(round(Q_true,digits=3)).")
println("The modularity of the Louvain partition is $(round(Q_lou,digits=3)).")
println("The modularity of the singelton partition is $(round(Q_sing,digits=3)).")

The modularity of the true partition is -595.683.
The modularity of the Louvain partition is -574.891.
The modularity of the singelton partition is -598.093.


In [58]:
println("The log-likelihood of the true partition is $(round(logLikelihood(H, Z, Ω, ϑ),digits=3)).")
println("The log-likelihood of the Louvain partition is $(round(logLikelihood(H, Zlou2, Ω, ϑ),digits=3)).")
println("The log-likelihood of the singleton partition is $(round(logLikelihood(H, Zsing, Ω, ϑ),digits=3)).")

The log-likelihood of the true partition is -362.563.
The log-likelihood of the Louvain partition is -356.375.
The log-likelihood of the singleton partition is -365.666.
