In [1]:
include("jl/inference.jl")
using Printf

In [2]:
function read_hypergraph_data(dataname::String, maxsize::Int64=25)
    labels = Int64[]
    open("data/$dataname/node-labels-$dataname.txt") do f
        for line in eachline(f)
            push!(labels, parse(Int64, line))
        end
    end
    n = length(labels)

    E = Dict{Integer, Dict}()
    open("data/$dataname/hyperedges-$dataname.txt") do f
        for line in eachline(f)
            edge = [parse(Int64, v) for v in split(line, ',')]
            sort!(edge)
            if length(edge) > maxsize; continue; end
            sz = length(edge)
            if !haskey(E, sz)
                E[sz] = Dict{}()
            end
            E[sz][edge] = 1
        end
    end
    
    D = zeros(Int64, n)
    for (sz, edges) in E
        for (e, _) in edges
            D[e] .+= 1
        end
    end
    
    N = 1:n
    
    return hypergraph(N, E, D), labels
end
;

In [3]:
# A bunch of move-based aggregation functions,
# all stratified by hyperedge size

identity(p::Vector{Int64}) = p

function discount_cut(p::Vector{Int64}, α=1.0)
    discount = sum(p .^ α) - maximum(p) ^ α
    return (sum(p), discount)
end

function sum_of_ext_degs(p::Vector{Int64})
    soed = length(p) - 1
    return (sum(p), soed)
end

function all_or_nothing(p::Vector{Int64})
    is_aon = length(p) == 1
    return (sum(p), is_aon)
end

function rainbow(p::Vector{Int64})
    is_rainbow = length(p) == sum(p) && length(p) > 1
    return (sum(p), is_rainbow)
end


function estimate_all(H, labels)
    aggs = [identity, discount_cut, sum_of_ext_degs, 
            all_or_nothing, rainbow]
    return [estimateΩEmpirically(H, labels; min_val=0, 
                                 aggregator=agg) for agg in aggs]
end

estimate_all (generic function with 1 method)

In [4]:
function show_estimates(H, labels, maxk)
    Ω̂s = estimate_all(H, labels)
    for k = 1:maxk
        for p in partitions(k)
            estimates = [Ω̂(p; α=1, mode="partition") for Ω̂ in Ω̂s]
            strs = join([@sprintf("%.3e", est) for est in estimates], ", ")
            println("$p\n\t$strs\n")
        end
    end
end

show_estimates (generic function with 1 method)

In [5]:
H, labels = read_hypergraph_data("contact-primary-school");
@time Ω̂ = estimateΩEmpirically(H, labels; min_val=0, aggregator=discount_cut)
for k = 2:4
    p1 = [k]
    p2 = [k - 1, 1]
    rat = Ω̂(p1; α=1, mode="partition") / Ω̂(p2; α=1, mode="partition")
    println("$p1 / $p2 $rat")
end

  2.231857 seconds (5.96 M allocations: 706.955 MiB, 12.36% gc time)
[2] / [1, 1] 4.44001110430888
[3] / [2, 1] 35.68468337653148
[4] / [3, 1] 61.61881494241789


In [6]:
H, labels = read_hypergraph_data("walmart-trips");
@time Ω̂ = estimateΩEmpirically(H, labels; min_val=0, aggregator=discount_cut)
for k = 2:8
    p1 = [k]
    p2 = [k - 1, 1]
    rat = Ω̂(p1; α=1, mode="partition") / Ω̂(p2; α=1, mode="partition")
    println("$p1 / $p2 $rat")
end

  2.730058 seconds (7.28 M allocations: 3.741 GiB, 27.00% gc time)
[2] / [1, 1] 2.7345769711532326
[3] / [2, 1] 2.7092254438376013
[4] / [3, 1] 3.6565106714457616
[5] / [4, 1] 4.637051383826315
[6] / [5, 1] 5.455954736833069
[7] / [6, 1] 7.818459130349408
[8] / [7, 1] 8.144259250199264


In [7]:
H, labels = read_hypergraph_data("TrivagoClickout");
@time Ω̂ = estimateΩEmpirically(H, labels; min_val=0, aggregator=discount_cut)
for k = 2:8
    p1 = [k]
    p2 = [k - 1, 1]
    rat = Ω̂(p1; α=1, mode="partition") / Ω̂(p2; α=1, mode="partition")
    println("$p1 / $p2 $rat")
end

 13.201690 seconds (118.09 M allocations: 3.808 GiB, 26.64% gc time)
[2] / [1, 1] NaN
[3] / [2, 1] 1710.709064527459
[4] / [3, 1] 2026.5037044869805
[5] / [4, 1] 2874.3708572560304
[6] / [5, 1] 2701.3909090128327
[7] / [6, 1] 3429.3942438500667
[8] / [7, 1] 4078.5422751178094
