# Quick Comparisons

This code is intended to be a speed test on the Walmart data, but currently dyadic modularity takes an impractical running time so we can safely assume that we won't do much better. 

In [3]:
using Optim 
# using Plots
using StatsBase

using Pkg; Pkg.activate(".")
using HypergraphModularity

[32m[1m Activating[22m[39m environment at `~/hypergraph_modularities_code/Project.toml`


In [4]:
dataset = "walmart-trips"
kmax_ = 3

H, Z = read_hypergraph_data(dataset,kmax_)

Z = convert(Array{Int16, 1}, Z) # type conversion for faster partitionize method

H.E[1] = Dict()

kmin = max(minimum(keys(H.E)), 2)
kmax = maximum(keys(H.E))

α0 = vcat(repeat([0.0], kmax), 1:kmax)

n = length(H.D)

function ω(p, α)
    k = sum(p)
    return sum(p)/sum((p .* (1:length(p)).^α[k])) / n^(α[kmax+k]*k)
end

Ω = buildΩ(ω, α0, kmax)

(::HypergraphModularity.var"#Ω#8"{HypergraphModularity.var"#Ω#3#9"{typeof(ω),Int64}}) (generic function with 1 method)

In [5]:
for k = kmin:kmax
    p = mean([length(partitionize(Z[e])) == 1 for e in keys(H.E[k])])
    println("k = $k: $(round(100*p, digits = 0)) % of $(length(keys(H.E[k]))) edges are within a single group.")
end

k = 2: 59.0 % of 12838 edges are within a single group.
k = 3: 43.0 % of 10110 edges are within a single group.


In [6]:
println("There are $(length(H.D)) nodes.")

There are 88860 nodes.


In [10]:
# G = CliqueExpansion(H)

In [6]:
timeAlg(expr)= @timed eval(expr)

algDict = Dict(
    "Dyadic"                    => :(CliqueExpansionModularity(H)),
#     "Hypergraph (no supernode)" => :(HyperLouvain(H,kmax,Ω;α=α̂, verbose=false)),
    "Hypergraph (supernode)"    => :(SuperNodeLouvain(H,kmax,Ω;α=α̂, verbose=false))
)

α̂ = α0

print(rpad("algorithm", 30))
print(rpad("Q", 15))
print(rpad("groups", 10))
println(rpad("time (s)", 10))
println(rpad("",  65, "-"))

Ẑ = zero(Z)

# for name in ["Dyadic", "Hypergraph (no supernode)", "Hypergraph (supernode)"]
for name in ["Dyadic", "Hypergraph (supernode)"]
    out = timeAlg(algDict[name])
    Ẑ = out[1]
    time = out[2]
    if name == "Dyadic"
        α̂, ll = estimateParameters(H, Ẑ, Ω, α0)
    end
    
    Q = modularity(H, Ẑ, Ω; α = α̂)
    
    print(rpad("$name", 30))
    print(rpad("$(round(Q, digits = 0))", 15))
    print(rpad("$(length(unique(Ẑ)))", 10))
    println(rpad("$time", 10))
end

print(rpad("TRUE LABELS", 30))
Q = modularity(H, Z, Ω; α=α̂)
print(rpad("$(round(Q, digits=0))", 15))
print(rpad("$(length(unique(Z)))", 10))
println(rpad("NA", 10))

algorithm                     Q              groups    time (s)  
-----------------------------------------------------------------


InterruptException: InterruptException: