In [20]:
using StatsBase
using Combinatorics
using Plots

include("jl/omega.jl")
include("jl/HSBM.jl")
include("jl/cut.jl")

# parameters

n = 50
Z = rand(1:5, n)
ϑ = dropdims(ones(1,n) + rand(1,n), dims = 1)

# defining group intensity function Ω
μ = mean(ϑ)

kmax = 4
kmin = 1

fk = k->(2.4*μ*k)^(-k)
fp = harmonicMean

Ω_dict = Dict()

for k = kmin:kmax, p in partitions(k)
    Ω_dict[p] = fk(sum(p))*fp(p)
end

Ω = buildΩ(Ω_dict; by_size=true)

hypergraph
  E: Dict{Integer,Dict}
  D: Array{Integer}((50,)) Integer[95, 125, 85, 67, 103, 112, 124, 76, 83, 90  …  107, 73, 132, 122, 76, 129, 85, 103, 128, 113]


In [21]:
# sample from the HSBM with these parameters, restricting to hyperedges of size no more than kmax
H = sampleSBM(Z, ϑ, Ω; kmax=kmax, kmin = kmin)

hypergraph
  E: Dict{Integer,Dict}
  D: Array{Integer}((50,)) Integer[91, 115, 95, 44, 97, 138, 136, 60, 74, 119  …  101, 61, 133, 98, 63, 138, 75, 126, 135, 95]


In [2]:
# Create a random clustering with k clusters
include("jl/cut.jl")
k = 10
c = rand(1:k,n)
term1 = first_term_eval(H,c,Ω)

-12620.992903638218

In [3]:
# PC: broken under the dictionary-defined Ω. Could add these manually if desired, but not sure whether this is a problem or not. 

# Quick checks on the group evaluation function
# p4 = [4,0,0,0]
# p3 = [3,1,0,0]
# p2 = [2,2,0,0]
# p1 = [2,1,1,0]
# p0 = [1,1,1,1]
# @show Ω(p4;mode="partition"), Ω(p3;mode="partition"),Ω(p2;mode="partition"),Ω(p1;mode="partition"),Ω(p0;mode="partition")
# I'm going to think about some group interactions functions where the output is ordered
# the same as the way I've ordered here, i.e. Ω(p(i+1) >= Ω(p(i)
# Ordering by majorization seems natural for clustering applications

# PC: the harmonic mean will give you zeros whenever there is a group without any nodes in it. I'd suggest writing this informal experiment as in the next block: 

In [4]:
# still works because we haven't added zero-padding. 

for i = 1:4, p in partitions(4, i)
    print(p, " ")
    println(Ω(p;mode="partition"))
end

# an interesting note from this is that the harmonic mean doesn't actually respect the majorization order....

[4] 3.3600649602968195e-5
[3, 1] 3.1268927789509234e-5
[2, 2] 3.3600649602968195e-5
[2, 1, 1] 2.8254665801332098e-5
[1, 1, 1, 1] 2.3759247186531884e-5


In [5]:
@time T1 = first_term_eval(H,c,Ω)

  0.005663 seconds (31.45 k allocations: 2.288 MiB)


-12620.992903638218

In [16]:
# Here's a faster way to calculate it...for now requires a different way to store the hypergraph and Omega

@time Hyp, w = hyperedge_formatting(H)

@time T2 = first_term_v2(Hyp,w,c,Ω)

T1 ≈ T2

  0.000476 seconds (1.81 k allocations: 93.250 KiB)
  0.007179 seconds (29.48 k allocations: 2.258 MiB)


true

In [17]:
@time for i=1:10000  Ω([3,1]; mode="partition") end
@time for i=1:10000  Ω([1, 1, 2, 1]; mode="group") end
@time for i=1:10000  partitionize([1, 1, 2, 1]) end

  0.008468 seconds (40.00 k allocations: 1.373 MiB)
  0.023124 seconds (160.00 k allocations: 14.954 MiB)
  0.047606 seconds (130.00 k allocations: 14.496 MiB, 55.73% gc time)
