In [1]:
using Parameters
using LinearAlgebra
using Combinatorics
using StatsBase
using NLopt
using Random
using SparseArrays
using Optim

# This is a stipped-down version of the original code,
# which computes voldiff and cutdiff both ways (old and new), so that
# we can compare the two approaches side by side.
# In some cases, (e.g. congress bills with kmax = 5)
# I've noticed differences in these computations, and I'm not sure why.
include("src/louvain_vol_computations.jl")

notsame

In [2]:
## Read in a dataset
dataset = "congress-bills"
# dataset = "contact-primary-school-classes"
# dataset = "TrivagoClickout"
# dataset = "walmart-trips"

kmax_ = 5

H, Z = read_hypergraph_data(dataset,kmax_)

(hypergraph
  N: Array{Int64}((1718,)) [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718]
  E: Dict{Int64,Dict}
  D: Array{Int64}((1718,)) [51, 22, 140, 44, 73, 164, 167, 70, 85, 21  …  51, 43, 29, 64, 48, 29, 9, 3, 0, 2]
, [1, 2, 1, 2, 1, 2, 1, 2, 2, 1  …  2, 2, 2, 2, 2, 1, 2, 1, 1, 1])

In [3]:
## Define Omega

kmax = maximum(keys(H.E))
kmin = minimum(keys(H.E))

n = length(H.D)

# all-or-nothing
function ω(p,α)
    k = p[2]
    δ = p[1]
    return ((1+(1-δ))*n)^α[k] / (n^α[k + kmax])
end

Ω = allOrNothingIntensityFunction(ω, kmax)

IntensityFunction(ω, var"#9#14"(), [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5)], var"#11#16"(), nothing)

In [4]:
@time Z_dyadic = CliqueExpansionModularity(H);

  0.941356 seconds (2.57 M allocations: 333.432 MiB, 4.61% gc time)


In [5]:
## Learn alpha
α = zeros(2*kmax);
α = learnParameters(H, Z_dyadic, Ω, α; n_iters = 1000, amin = -50, amax = 50)

10-element Array{Float64,1}:
 -11.80339887498949
  -8.931683657590579
  -7.685855612273574
  -7.503171006378569
  -7.109466372742029
 -11.231606811035544
  -7.249863846362905
  -4.556675913952602
  -2.9119792155112365
  -0.9998760299723046

In [6]:
p = Ω.P([3, 3, 2, 1])
a = Ω.aggregator(p)
a

(false, 4)

In [7]:
## Modularity of this partition
Q_dyadic = modularity(H, Z_dyadic, Ω; α = α)

-1.056910161302624276275713533664700478619777431050731297760749680510947095232232e+06

In [8]:
## Convert hypergraph to new input types
include("src/AON_hyperlouvain.jl")
cut_weights, vol_weights, e2n, n2e,w,d,elen = AON_Inputs(H,ω,α,kmax);
vol_weights
# @assert(d == H.D)

5-element Array{Float64,1}:
 0.014129210814019905
 3.617292971672843e-6
 7.497507332445864e-11
 1.3963288510822363e-15
 1.7067790202143493e-20

In [9]:
## Original version
include("src/louvain_vol_computations.jl")
# set this to true to see how the voldiff computations 
# differ between the two approaches
printvoldiff = true 
tic = time()
Z = HyperLouvain_Vols(H, kmax, cut_weights, vol_weights, Ω, printvoldiff; α=α, verbose = true);
runtime = time()-tic
numclus_old = length(unique(Z))
println("Old Version: $numclus_old clusters, $runtime sec")

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Louvain Iteration 7
Louvain Iteration 8
Louvain Iteration 9
Louvain Iteration 10
Louvain Iteration 11
Louvain Iteration 12
mainloop = 32.76590895652771
Old Version: 25 clusters, 34.473509073257446 sec


In [10]:
## Run one step of new approach
tic = time()
Z, improved = ANHL_Step(n2e,e2n,w, d,elen,cut_weights, vol_weights,kmax)
runtime = time()-tic
clus = length(unique(Z))
println("New Version: $clus clusters, $runtime sec")

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Louvain Iteration 7
Louvain Iteration 8
Louvain Iteration 9
Louvain Iteration 10
Louvain Iteration 11
Louvain Iteration 12
Main loop took 0.4833030700683594 seconds
New Version: 25 clusters, 0.8927738666534424 sec


In [11]:
## Full supernode version
verbose = true
maxits = 100
randflag = true
# Zwarm = Z_dyadic    # can optionally specify a warm start clustering
Zwarm = collect(1:n)  # no warm start
@time Zs = SuperNode_PPLouvain(n2e,e2n,w,d,elen,cut_weights,vol_weights,kmax,randflag,maxits,verbose,Zwarm);

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Louvain Iteration 7
Louvain Iteration 8
Louvain Iteration 9
Louvain Iteration 10
Louvain Iteration 11
Louvain Iteration 12
Main loop took 0.5496470928192139 seconds
One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Main loop took 0.0001480579376220703 seconds
One step of all-or-nothing HyperLouvain

Louvain Iteration 1
No nodes moved clusters
Main loop took 7.295608520507812e-5 seconds
  0.914762 seconds (3.24 M allocations: 424.401 MiB, 7.79% gc time)
