In [5]:
## Generate a graph
using StatsBase
using Combinatorics

include("jl/omega.jl")
include("jl/HSBM.jl")

# parameters

n = 50
Z = rand(1:5, n)
ϑ = dropdims(ones(1,n) + rand(1,n), dims = 1)

# defining group intensity function Ω
μ = mean(ϑ)

kmax = 4
kmin = 1

fk = k->(2 .*μ*k)^(-k)
fp = x->harmonicMean(x)^3

Ω_dict = Dict{Vector{Int64}, Float64}()

for k = kmin:kmax, p in partitions(k)
    Ω_dict[p] = fk(sum(p))*fp(p)/100
end

for p in keys(Ω_dict)
    if sum(p) == 1
        Ω_dict[p] = 0
    end
end

Ω = buildΩ(Ω_dict; by_size=true)

┌ Info: Precompiling Parameters [d96e819e-fc66-5662-9728-84c9c7592b0a]
└ @ Base loading.jl:1260


Ω (generic function with 1 method)

In [6]:
## Sample
H = sampleSBM(Z, ϑ, Ω; kmax=kmax, kmin = kmin)

hypergraph
  N: Array{Int64}((50,)) [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
  E: Dict{Int64,Dict}
  D: Array{Int64}((50,)) [3, 8, 2, 3, 2, 7, 2, 7, 4, 2  …  3, 2, 2, 1, 3, 5, 2, 1, 3, 3]


In [8]:
## Run Louvains, Naive, and Faster version
include("jl/hypergraph_louvain.jl")

@time Zlou = Naive_HyperLouvain(H,Ω)
@time Zlou2 = HyperLouvain(H,kmax,Ω)

mlou = modularity(H,Zlou,Ω)
mlou2 = modularity(H,Zlou2,Ω)

mlou ≈ mlou2


Louvain Iteration 1
Louvain Iteration 2
  2.239675 seconds (16.86 M allocations: 927.129 MiB, 17.19% gc time)

Louvain Iteration 1
Louvain Iteration 2
  0.706928 seconds (1.60 M allocations: 80.104 MiB, 4.18% gc time)


true

In [10]:
include("jl/inference.jl")

Zsing = collect(1:n)

ω̂_lou2 = estimateΩ(H, Zlou2)
Ω̂_lou2 = buildΩ(ω̂_lou2; by_size=true)

ω̂_sing = estimateΩ(H, Zsing)
Ω̂_sing = buildΩ(ω̂_sing; by_size=true)

ω̂ = estimateΩ(H, Z)
Ω̂ = buildΩ(ω̂; by_size=true)

(::var"#Ω#288"{var"#Ω#287#289"{Dict{Int64,Dict{Array{Int64,1},Float64}}}}) (generic function with 1 method)

In [11]:
# Also compare modularity scores against the ground truth community,
# and all singleton clusters
# Strange that trivially putting all nodes in one cluster does better than the ground truth...
# Am I missing something?

Q_true = convert(Float64, modularity(H, Z, Ω̂))
Q_sing = convert(Float64, modularity(H, Zsing, Ω̂_sing))
Q_lou  = convert(Float64, modularity(H, Zlou2, Ω̂_lou2))

println("The modularity of the true partition is $(round(Q_true,digits=3)).")
println("The modularity of the Louvain partition is $(round(Q_lou,digits=3)).")
println("The modularity of the singelton partition is $(round(Q_sing,digits=3)).")

The modularity of the true partition is -702.83.
The modularity of the Louvain partition is -696.293.
The modularity of the singelton partition is -703.179.


In [12]:
# all with true parameters

println("The log-likelihood of the true partition is $(round(logLikelihood(H, Z, Ω, ϑ),digits=3)).")
println("The log-likelihood of the Louvain partition is $(round(logLikelihood(H, Zlou2, Ω, ϑ),digits=3)).")
println("The log-likelihood of the singleton partition is $(round(logLikelihood(H, Zsing, Ω, ϑ),digits=3)).")

The log-likelihood of the true partition is -409.981.
The log-likelihood of the Louvain partition is -409.334.
The log-likelihood of the singleton partition is -415.225.


# Alternating Updates

In [13]:
# encouraging that this does indeed tend to decrease. I don't think it's required to be monotonically decreasing (need to check), so heuristically this looks ok-ish

Ω̂ = buildΩ(estimateΩ(H, Z); by_size=true)

Z_ = copy(Z)

for i = 1:5
    Z_ = HyperLouvain(H,kmax,Ω̂)
    Ω̂  = buildΩ(estimateΩ(H, Z_); by_size=true)
    println("The log-likelihood of the Louvain partition is $(round(logLikelihood(H, Z_, Ω̂),digits=3)).")
end


Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
The log-likelihood of the Louvain partition is -380.986.

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
The log-likelihood of the Louvain partition is -363.522.

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
The log-likelihood of the Louvain partition is -362.346.

Louvain Iteration 1
Louvain Iteration 2
The log-likelihood of the Louvain partition is -371.235.

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
The log-likelihood of the Louvain partition is -362.346.
