In [1]:
using Parameters
using LinearAlgebra
using Combinatorics
using StatsBase
using NLopt
using Random
using SparseArrays
using Optim
using TimerOutputs

# This is a stipped-down version of the original code,
# which computes voldiff and cutdiff both ways (old and new), so that
# we can compare the two approaches side by side.
# In some cases, (e.g. congress bills with kmax = 5)
# I've noticed differences in these computations, and I'm not sure why.
include("src/louvain_vol_computations.jl")

notsame

In [2]:
## Read in a dataset
dataset = "congress-bills"
# dataset = "contact-primary-school-classes"
dataset = "TrivagoClickout"
dataset = "walmart-trips"

kmax_ = 5

H, Z = read_hypergraph_data(dataset,kmax_)

(hypergraph
  N: Array{Int64}((88860,)) [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  88851, 88852, 88853, 88854, 88855, 88856, 88857, 88858, 88859, 88860]
  E: Dict{Int64,Dict}
  D: Array{Int64}((88860,)) [0, 0, 3, 2, 1, 32, 0, 0, 2, 10  …  0, 0, 1, 1, 0, 0, 0, 0, 1, 1]
, [7, 7, 7, 7, 7, 11, 7, 7, 7, 7  …  1, 3, 1, 1, 7, 5, 5, 5, 10, 11])

In [3]:
kmax = maximum(keys(H.E))
kmin = minimum(keys(H.E))

n = length(H.D)

# all-or-nothing
function ω(p,α)
    k = p[2]
    δ = p[1]
    return ((1+(1-δ))*n)^α[k] / (n^α[k + kmax])
end

Ω = allOrNothingIntensityFunction(ω, kmax)

IntensityFunction(ω, var"#11#16"(), Tuple{Int64,Int64}[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5)], var"#13#18"(), nothing)

In [4]:
@time Z_dyadic = CliqueExpansionModularity(H);

 44.275654 seconds (60.65 M allocations: 11.434 GiB, 6.70% gc time)


In [5]:
Z_learn = Z_dyadic;

In [6]:
## Learn alpha
α = zeros(2*kmax);
α = learnParameters(H, Z_learn, Ω, α; n_iters = 1000, amin = -50, amax = 50)

10-element Array{Float64,1}:
 -11.80339887498949   
 -11.484724215480254  
 -10.671528010107668  
  -9.910031420143213  
  -4.368009855035218  
  11.80339887498948   
 -10.374636778679204  
  -8.572987822270028  
  -6.817281072455037  
  -0.22196114519680707

In [7]:
## Modularity of the partition you learned parameters on
Q_learn = modularity(H, Z_learn, Ω; α = α)

-1.033626478778783246003967931209830256646114407178661854264219298029340708042631e+06

In [8]:
## Convert hypergraph to new input types
include("src/AON_hyperlouvain.jl")
cut_weights, vol_weights, e2n, n2e,w,d,elen = AON_Inputs(H,ω,α,kmax);
@assert(d == H.D)

In [9]:
## Run one step of new approach
tic = time()
Z, improved = ANHL_Step(n2e,e2n,w, d,elen,cut_weights, vol_weights,kmax)
runtime = time()-tic
clus = length(unique(Z))
println("New Version: $clus clusters, $runtime sec")

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Louvain Iteration 7
Louvain Iteration 8
Louvain Iteration 9
Louvain Iteration 10
Louvain Iteration 11
Louvain Iteration 12
Louvain Iteration 13
Louvain Iteration 14
Louvain Iteration 15
Louvain Iteration 16
Louvain Iteration 17
Louvain Iteration 18
Main loop took 8.473057985305786 seconds
New Version: 78423 clusters, 12.51836609840393 sec


In [10]:
## Full supernode version
verbose = true
maxits = 100
randflag = true

# Here's where you can set the warm start.

Zwarm = Z_dyadic    # can optionally specify a warm start clustering
#Zwarm = collect(1:n)  # no warm start
@time Zset = SuperNode_PPLouvain(n2e,e2n,w,d,elen,cut_weights,vol_weights,kmax,randflag,maxits,verbose,Zwarm);
Zs = Zset[:,end];

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Louvain Iteration 6
Main loop took 24.049147129058838 seconds
One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Main loop took 0.1979689598083496 seconds
One step of all-or-nothing HyperLouvain

Louvain Iteration 1
No nodes moved clusters
Main loop took 0.0739450454711914 seconds
 41.286362 seconds (15.63 M allocations: 23.965 GiB, 8.96% gc time)


In [30]:
## Check modularities
Q_learn = modularity(H, Z_learn, Ω; α = α)
Q_1 = modularity(H, Z, Ω; α = α)
Q_s = modularity(H, Zs, Ω; α = α)
println("$Q_learn \n$Q_1 \n$Q_s")

nc_learn = length(unique(Z_learn))
nc_1 = length(unique(Z))
nc_super = length(unique(Zs))
println("$nc_learn \t $nc_1 \t $nc_super")

## The average cluster size is tiny. 
# But perhaps more interesting is to focus on the number and size of nontrivial clusters
minsize = 8
NonTrivialClusters = length(findall(x->x>minsize,collect(values(countmap(Z)))))
println("The hypergraph louvain answer has $NonTrivialClusters clusters with more than $minsize nodes")

-1.033626478778783246003967931209830256646114407178661854264219298029340708042631e+06 
-1.105575867513021256148416874191502015091609023464244678772364533105802418728499e+06 
-1.029582925800438924475466833337162334930257637412097476081221706492528263521597e+06
44769 	 78423 	 44705
The hypergraph louvain answer has 35 clusters with more than 8 nodes


In [120]:
## Alternating optimization
include("src/AON_hyperlouvain.jl")
α̂  = zeros(2*kmax);
α̂  = learnParameters(H, Z_learn, Ω, α̂; n_iters = 1000, amin = -50, amax = 50)
verbose = false
maxits = 100
randflag = true

println("POLYADIC")
print(rpad("iteration", 20))
print(rpad("Q", 15))
print(rpad("groups", 10))
println(rpad("time (s)", 10))
println(rpad("",  65, "-"))

Ẑ = zero(Z)
Zwarm = collect(1:n)  # no warm start

for i = 1:10
    cut_weights, vol_weights, e2n, n2e,w,d,elen = AON_Inputs(H,ω,α̂,kmax);
    tic = time()
    Zset = SuperNode_PPLouvain(n2e,e2n,w,d,elen,cut_weights,vol_weights,kmax,randflag,maxits,verbose,Zwarm)
    toc = time()-tic
    Ẑ = Zset[:,end]
        
    α̂ = learnParameters(H, Ẑ, Ω, α̂; n_iters = 10, amin = -10, amax = 10)
    Q = modularity(H, Ẑ, Ω; α = α̂)
    
    print(rpad("$i", 20))
    print(rpad("$(round(Q))", 15))
    print(rpad("$(length(unique(Ẑ)))", 10))
    println(rpad("$(round(toc; digits=3))", 10))
end

POLYADIC
iteration           Q              groups    time (s)  
-----------------------------------------------------------------
1                   -978310.0      77472     69.768    
2                   -977754.0      77366     65.374    
3                   -977682.0      77361     68.407    
4                   -977610.0      77348     67.67     
5                   -977621.0      77338     68.083    
6                   -977653.0      77341     69.522    
7                   -977750.0      77355     69.535    
8                   -977632.0      77336     71.721    
9                   -977623.0      77338     67.545    
10                  -977671.0      77349     67.864    
