In [125]:
using Parameters
using LinearAlgebra
using Combinatorics
using StatsBase
using NLopt
using Random
using SparseArrays
using Optim

# This is a stipped-down version of the original code,
# which computes voldiff and cutdiff both ways (old and new), so that
# we can compare the two approaches side by side.
# In some cases, (e.g. congress bills with kmax = 5)
# I've noticed differences in these computations, and I'm not sure why.
include("src/louvain_vol_computations.jl")

notsame

In [126]:
## Read in a dataset
dataset = "congress-bills"
# dataset = "contact-primary-school-classes"
# dataset = "TrivagoClickout"
# dataset = "walmart-trips"

kmax_ = 5

H, Z = read_hypergraph_data(dataset,kmax_)

(hypergraph
  N: Array{Int64}((1718,)) [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718]
  E: Dict{Int64,Dict}
  D: Array{Int64}((1718,)) [51, 22, 140, 44, 73, 164, 167, 70, 85, 21  …  51, 43, 29, 64, 48, 29, 9, 3, 0, 2]
, [1, 2, 1, 2, 1, 2, 1, 2, 2, 1  …  2, 2, 2, 2, 2, 1, 2, 1, 1, 1])

In [116]:
## Define Omega

kmax = maximum(keys(H.E))
kmin = minimum(keys(H.E))

n = length(H.D)

# all-or-nothing
function ω(p,α)
    k = p[2]
    δ = p[1]
    return ((1+(1-δ))*n)^α[k] / (n^α[k + kmax])
end

Ω = allOrNothingIntensityFunction(ω, kmax)

IntensityFunction(ω, var"#2505#2510"(), Tuple{Int64,Int64}[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5)], var"#2507#2512"(), nothing)

In [127]:
@time Z_dyadic = CliqueExpansionModularity(H);

  1.655707 seconds (2.60 M allocations: 333.526 MiB, 5.99% gc time)


In [128]:
## Learn alpha
α = zeros(2*kmax);
α = learnParameters(H, Z_dyadic, Ω, α; n_iters = 1000, amin = -50, amax = 50)

10-element Array{Float64,1}:
 -11.80339887498949  
  -8.93168359003797  
  -7.6858558070596175
  -7.503171307531319 
  -7.109466811646286 
 -11.231606811035544 
  -7.249863778775007 
  -4.556676109782012 
  -2.91197952229933  
  -0.9998764904560583

In [129]:
## Modularity of this partition
Q_dyadic = modularity(H, Z_dyadic, Ω; α = α)

-1.056910161893882948935334649250753038073296538338171923550486962755762274358761e+06

In [130]:
## Convert hypergraph to new input types
include("src/AON_hyperlouvain.jl")
cut_weights, vol_weights, e2n, n2e,w,d,elen = AON_Inputs(H,ω,α,kmax);
vol_weights
# @assert(d == H.D)

5-element Array{Float64,1}:
 0.014129210814019905  
 3.6172929703744154e-6 
 7.497507395656314e-11 
 1.3963289113117687e-15
 1.7067792983562404e-20

In [121]:
## Original version
include("src/louvain_vol_computations.jl")
# set this to true to see how the voldiff computations 
# differ between the two approaches
printvoldiff = true 
tic = time()
Z = HyperLouvain_Vols(H, kmax, cut_weights, vol_weights, Ω, printvoldiff; α=α, verbose = true);
runtime = time()-tic
numclus_old = length(unique(Z))
println("Old Version: $numclus_old clusters, $runtime sec")

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Louvain Iteration 7
Louvain Iteration 8
Louvain Iteration 9
Louvain Iteration 10
Louvain Iteration 11
Louvain Iteration 12
mainloop = 46.98367094993591
Old Version: 25 clusters, 48.4324791431427 sec


In [131]:
## Run one step of new approach
tic = time()
Z, improved = ANHL_Step(n2e,e2n,w, d,elen,cut_weights, vol_weights,kmax)
runtime = time()-tic
clus = length(unique(Z))
println("New Version: $clus clusters, $runtime sec")

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Louvain Iteration 7
Louvain Iteration 8
Louvain Iteration 9
Louvain Iteration 10
Louvain Iteration 11
Louvain Iteration 12
Main loop took 0.720160961151123 seconds
New Version: 25 clusters, 1.4048919677734375 sec


In [132]:
## Full supernode version
verbose = true
maxits = 100
randflag = true
# Zwarm = Z_dyadic    # can optionally specify a warm start clustering
Zwarm = collect(1:n)  # no warm start
@time Zs = SuperNode_PPLouvain(n2e,e2n,w,d,elen,cut_weights,vol_weights,kmax,randflag,maxits,verbose,Zwarm);

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Louvain Iteration 7
Louvain Iteration 8
Louvain Iteration 9
Louvain Iteration 10
Louvain Iteration 11
Louvain Iteration 12
Louvain Iteration 13
Louvain Iteration 14
Louvain Iteration 15
Louvain Iteration 16
Main loop took 0.8297998905181885 seconds
One step of all-or-nothing HyperLouvain

Louvain Iteration 1
No nodes moved clusters
Main loop took 0.00010704994201660156 seconds
  1.419140 seconds (3.11 M allocations: 477.994 MiB, 8.27% gc time)
