In [1]:
using Parameters
using LinearAlgebra
using Combinatorics
using StatsBase
using NLopt
using Random
using SparseArrays
using Optim

# This is a stipped-down version of the original code,
# which computes voldiff and cutdiff both ways (old and new), so that
# we can compare the two approaches side by side.
# In some cases, (e.g. congress bills with kmax = 5)
# I've noticed differences in these computations, and I'm not sure why.
include("src/louvain_vol_computations.jl")

notsame

In [2]:
## Read in a dataset
dataset = "congress-bills"
# dataset = "contact-primary-school-classes"
# dataset = "TrivagoClickout"
# dataset = "walmart-trips"

kmax_ = 6

H, Z = read_hypergraph_data(dataset,kmax_)

(hypergraph
  N: Array{Int64}((1718,)) [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 1717, 1718]
  E: Dict{Int64,Dict}
  D: Array{Int64}((1718,)) [55, 30, 171, 60, 89, 192, 208, 84, 106, 27  …  63, 49, 34, 82, 60, 32, 9, 4, 0, 2]
, [1, 2, 1, 2, 1, 2, 1, 2, 2, 1  …  2, 2, 2, 2, 2, 1, 2, 1, 1, 1])

In [3]:
## Define Omega

kmax = maximum(keys(H.E))
kmin = minimum(keys(H.E))
if kmin ==1
    H.E[1] = Dict()
end

n = length(H.D)

# all-or-nothing
function ω(p,α)
    k = p[2]
    δ = p[1]
    return ((1+(1-δ))*n)^α[k] / (n^α[k + kmax])
end

Ω = allOrNothingIntensityFunction(ω, kmax)


IntensityFunction(ω, var"#9#14"(), [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6)], var"#11#16"(), nothing)

In [4]:
@time Z_dyadic = CliqueExpansionModularity(H);

  1.543300 seconds (3.04 M allocations: 409.297 MiB, 16.21% gc time)


In [5]:
## Learn alpha
α = zeros(2*kmax);
α = learnParameters(H, Z_dyadic, Ω, α; n_iters = 1000, amin = -50, amax = 50)

12-element Array{Float64,1}:
 -11.80339887498949
  -9.984141419283793
  -9.749702007425228
  -9.330771357468702
  -9.084913145332889
  -7.547378808059866
  11.80339887498948
  -8.19690105585436
  -6.439104284143808
  -4.493095409442233
  -2.7088181524465016
   0.3835993847103965

In [7]:
## I'm not sure why this doesn't work to check the modularity of this partition
Q_dyadic = modularity(H, Z_dyadic, Ω; α = α)

-1.369356807677881889619057145865031285180528669709084786138308852485586135454607e+06

In [7]:
## Convert hypergraph to new input types
include("src/AON_hyperlouvain.jl")
cut_weights, vol_weights, e2n, n2e,w,d,elen = AON_Inputs(H,ω,α,kmax);


In [14]:
## Original version

# set this to true to see how the voldiff computations 
# differ between the two approaches
printvoldiff = false  
tic = time()
Z = HyperLouvain_Vols(H, kmax, cut_weights, vol_weights, Ω, printvoldiff; α=α, verbose = true);
runtime = time()-tic
numclus_old = length(unique(Z))
println("Old Version: $numclus_old clusters, $runtime sec")


Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
mainloop = 43.75273299217224
Old Version: 11 clusters, 44.1561119556427 sec


In [15]:
## Run one step of new approach
tic = time()
Z, improved = ANHL_Step(n2e,e2n,w, d,elen,cut_weights, vol_weights,kmax)
runtime = time()-tic
clus = length(unique(Z))
println("New Version: $clus clusters, $runtime sec")

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Main loop took 0.3394150733947754 seconds
New Version: 11 clusters, 0.5627901554107666 sec


In [16]:
## Full supernode version
verbose = true
maxits = 100
randflag = true
# Zwarm = Z_dyadic    # can optionally specify a warm start clustering
Zwarm = collect(1:n)  # no warm start
@time Zs = SuperNode_PPLouvain(n2e,e2n,w,d,elen,cut_weights,vol_weights,kmax,randflag,maxits,verbose,Zwarm);

One step of all-or-nothing HyperLouvain

Louvain Iteration 1
Louvain Iteration 2
Louvain Iteration 3
Louvain Iteration 4
Louvain Iteration 5
Main loop took 0.47547101974487305 seconds
One step of all-or-nothing HyperLouvain

Louvain Iteration 1
No nodes moved clusters
Main loop took 0.0001010894775390625 seconds
  0.913364 seconds (2.57 M allocations: 290.927 MiB, 21.48% gc time)


In [20]:
vol_weights

6-element Array{Float64,1}:
 4.2789876363997184e-77
 1.6512278976568762e-6 
 1.9482062564750627e-11
 2.2352768226292953e-16
 2.357135508785145e-21 
 2.191664137568531e-26 