In [1]:
using Pkg
Pkg.activate(".")

[32m[1m Activating[22m[39m environment at `~/code/hypergraph_modularities_code/Project.toml`


In [2]:
using HypergraphModularity
using LinearAlgebra
using Printf
using SparseArrays
using StatsBase

In [3]:
function all_or_nothing(p::Vector{Int64})
    is_aon = length(p) == 1
    return (sum(p), is_aon)
end

function polyadic_AON_MLE(Hyp, Z)
    Ω̂ = estimateΩEmpirically(Hyp, Z; min_val=0, aggregator=all_or_nothing)
    ll = Float64(sum(logLikelihood(Hyp, Z, Ω̂; α=0, bigInt=true)))
    return (ll, Ω̂)    
end

function dyadic_MLE(Hyp, Z, weighted::Bool, binary::Bool)
    ω_in, ω_out = 
        computeDyadicResolutionParameter(Hyp, Z; mode="ω", weighted=weighted, binary=binary)
    γ = (ω_in - ω_out) / (log(ω_in) - log(ω_out))
    Q = dyadicModularity(Hyp, Z, γ; weighted=weighted, binary=binary)
    ll = Float64(dyadicLogLikelihood(Hyp, Z, ω_in, ω_out; weighted=weighted, binary=binary))
    return (ll, ω_in, ω_out, γ, Q)
end

dyadic_MLE (generic function with 1 method)

In [4]:
function decomposedDyadicModularity(H, Z, γ, weighted, binary)
    G = CliqueExpansion(H, weighted, binary)
    cluster_ids = unique(Z)
    
    # edge terms
    edges = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    for (i, j, v) in zip(SparseArrays.findnz(G)...)
        if Z[i] == Z[j]
            edges[Z[i]] += v
        end
	end

    # volume terms                                                                                                                                                   
    d = vec(sum(G, dims=1))
    vols = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    for (ci, di) in zip(Z, d)
        vols[ci] += di
    end

    # modularity terms
    volG = sum(d)
    Qs = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    for c in cluster_ids
        Qs[c] = (edges[c] - γ * vols[c]^2 / volG) / volG
    end

    return Qs
end

decomposedDyadicModularity (generic function with 1 method)

In [5]:
function decomposedAONPolyadicModularity(H, Z, Ω)
    cluster_ids = unique(Z)
    all_Qs = Dict()
    combined_Qs = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    
    # Volume terms
    vols = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    for (ci, di) in zip(Z, H.D)
        vols[ci] += di
    end
    
    for ℓ = 2:maximum(keys(H.E))
        if haskey(H.E, ℓ)
            # W terms
            Ws = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
            for (edge, weight) in H.E[ℓ]
                Zedge = [Z[v] for v in edge]
                if all(z -> z == Zedge[1], Zedge)
                   Ws[Zedge[1]] += weight
                end
            end
            
            # Modularities
            ω1 = Ω.ω((ℓ, true),  0)
            ω0 = Ω.ω((ℓ, false), 0)
            #@show ℓ, ω1, ω0
            α = log(ω1) - log(ω0)
            β = ω1 - ω0
            Qs = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
            for c in cluster_ids
                Qs[c] = α * Ws[c] - β * vols[c]^ℓ
                combined_Qs[c] += Qs[c]
            end
            all_Qs[ℓ] = Qs
        end
    end
    
    return all_Qs, combined_Qs
end

decomposedAONPolyadicModularity (generic function with 1 method)

In [6]:
function dyadic_modularity_contributions(H, labels, names, topk=10, weighted=false, binary=true)
    g_ll, ω_in, ω_out, γ, Q = dyadic_MLE(H, labels, weighted, binary)
    dyadicQs = decomposedDyadicModularity(H, labels, γ, weighted, binary)
    stopind = min(topk, length(names))

    # Dyadic from clique expansion
    println("dyadic...")
    dyadicQ = sort([(name, dyadicQs[i]) for (i, name) in enumerate(names)], by=kv->kv[2], rev=true)[1:stopind]
    for (i, (name, Q)) in enumerate(dyadicQ)
        println(rpad("$i: ", 5), 
                rpad(name, 31),
                rpad(@sprintf("%.3E", Q), 15))
    end
    println("------")
end

function polyadic_modularity_contributions(H, labels, names, topk=10, individual=false)
    h_ll, Ω̂ = polyadic_AON_MLE(H, labels)
    Qs, combined_Qs = decomposedAONPolyadicModularity(H, labels, Ω̂)
    stopind = min(topk, length(names))
   
    # Polyadic (combined)
    println("polyadic...")
    polyadicQ = sort([(name, combined_Qs[i]) for (i, name) in enumerate(names)], by=kv->kv[2], rev=true)[1:stopind]
    for (i, (name, Q)) in enumerate(polyadicQ)
        println(rpad("$i: ", 5), 
                rpad(name, 31),
                rpad(@sprintf("%.3E", Q), 15))
    end
    println("------")
    
    if individual
        # Polyadic (per hyperedge size)
        for key in sort(collect(keys(Qs)))
            println("ℓ = $(key)...")
            Qℓ = sort([(name, Qs[key][i]) for (i, name) in enumerate(names)], by=kv->kv[2], rev=true)[1:stopind]
            for (i, (name, Q)) in enumerate(Qℓ)
                println(rpad("$i: ", 5), 
                        rpad(name, 31),
                        rpad(@sprintf("%.3E", Q), 15))
            end
            println("------")
        end
    end
end

polyadic_modularity_contributions (generic function with 3 methods)

In [7]:
function walmart_no_other(min, max)
    dataset = "walmart-trips"
    H, labels = read_hypergraph_data(dataset, max, min)
    names = read_hypergraph_label_names(dataset)

    # throw out "Other" category
    other_key = "Other"
    other_ind = findfirst(names .== other_key)
    keep = Vector{Bool}(labels .!= other_ind)
    (subH, node_map) = subhypergraph(H, keep)
    sub_labels = zeros(Int64, maximum(subH.N))
    for (i, j) in node_map
        sub_labels[j] = labels[i]
    end

    # Just to keep rest of the code the same
    H = subH
    labels = sub_labels
    names = names[names .!= other_key]
    return H, labels, names
end

walmart_no_other (generic function with 1 method)

In [8]:
H, labels, names = walmart_no_other(2, 10)
dyadic_modularity_contributions(H, labels, names, 12)
polyadic_modularity_contributions(H, labels, names, 12)

dyadic...
1:   Food, Household & Pets         9.198E-02      
2:   Pharmacy, Health & Beauty      4.885E-02      
3:   Clothing, Shoes & Accessories  4.247E-02      
4:   Home, Furniture & Appliances   1.378E-02      
5:   Baby                           1.238E-02      
6:   Auto, Tires & Industrial       8.468E-03      
7:   Home Improvement & Patio       6.702E-03      
8:   Electronics and Office         6.170E-03      
9:   Toys, Games, and Video Games   4.499E-03      
10:  Sports, Fitness & Outdoors     3.582E-03      
------
polyadic...
1:   Food, Household & Pets         1.535E+04      
2:   Pharmacy, Health & Beauty      4.611E+03      
3:   Clothing, Shoes & Accessories  4.314E+03      
4:   Home, Furniture & Appliances   1.145E+03      
5:   Baby                           1.014E+03      
6:   Electronics and Office         9.484E+02      
7:   Auto, Tires & Industrial       9.062E+02      
8:   Home Improvement & Patio       5.541E+02      
9:   Toys, Games, and Video Games  

In [9]:
for k = 2:5
    H, labels, names = walmart_no_other(k, k)
    println("$k...")
    dyadic_modularity_contributions(H, labels, names)
    polyadic_modularity_contributions(H, labels, names)
end

2...
dyadic...
1:   Food, Household & Pets         1.147E-01      
2:   Clothing, Shoes & Accessories  7.365E-02      
3:   Pharmacy, Health & Beauty      7.350E-02      
4:   Electronics and Office         2.884E-02      
5:   Home, Furniture & Appliances   2.741E-02      
6:   Auto, Tires & Industrial       2.670E-02      
7:   Baby                           1.364E-02      
8:   Toys, Games, and Video Games   1.295E-02      
9:   Home Improvement & Patio       1.148E-02      
10:  Sports, Fitness & Outdoors     8.676E-03      
------
polyadic...
1:   Food, Household & Pets         2.282E+03      
2:   Clothing, Shoes & Accessories  1.466E+03      
3:   Pharmacy, Health & Beauty      1.463E+03      
4:   Electronics and Office         5.740E+02      
5:   Home, Furniture & Appliances   5.454E+02      
6:   Auto, Tires & Industrial       5.314E+02      
7:   Baby                           2.714E+02      
8:   Toys, Games, and Video Games   2.577E+02      
9:   Home Improvement & Patio 

In [10]:
dataset = "congress-bills"
H, labels = read_hypergraph_data(dataset, 10)
names = read_hypergraph_label_names(dataset)
println("Weight-normalized")
dyadic_modularity_contributions(H, labels, names, 2, true, false)
println("Clique-weighted")
dyadic_modularity_contributions(H, labels, names, 2, false, false)
println("binary")
dyadic_modularity_contributions(H, labels, names, 2, false, true)
polyadic_modularity_contributions(H, labels, names, 2, true)

Weight-normalized
dyadic...
1:   1 democrat                     8.075E-02      
2:   2 republican                   7.661E-02      
------
Clique-weighted
dyadic...
1:   1 democrat                     8.530E-02      
2:   2 republican                   8.030E-02      
------
binary
dyadic...
1:   1 democrat                     5.421E-02      
2:   2 republican                   5.247E-02      
------
polyadic...
1:   2 republican                   7.377E+03      
2:   1 democrat                     6.936E+03      
------
ℓ = 2...
1:   2 republican                   3.672E+02      
2:   1 democrat                     2.423E+02      
------
ℓ = 3...
1:   2 republican                   7.628E+02      
2:   1 democrat                     5.250E+02      
------
ℓ = 4...
1:   2 republican                   9.557E+02      
2:   1 democrat                     6.447E+02      
------
ℓ = 5...
1:   2 republican                   8.914E+02      
2:   1 democrat                     8.639E+02      


In [11]:
for k in [2, 5, 10]
    H, labels = read_hypergraph_data(dataset, k, k)
    names = read_hypergraph_label_names(dataset)
    println("$k...")
    dyadic_modularity_contributions(H, labels, names)
    polyadic_modularity_contributions(H, labels, names)
end

2...
dyadic...
1:   1 democrat                     5.633E-02      
2:   2 republican                   5.460E-02      
------
polyadic...
1:   1 democrat                     3.260E+02      
2:   2 republican                   3.160E+02      
------
5...
dyadic...
1:   1 democrat                     7.254E-02      
2:   2 republican                   6.960E-02      
------
polyadic...
1:   2 republican                   8.946E+02      
2:   1 democrat                     8.494E+02      
------
10...
dyadic...
1:   1 democrat                     6.999E-02      
2:   2 republican                   6.686E-02      
------
polyadic...
1:   1 democrat                     8.448E+02      
2:   2 republican                   7.131E+02      
------


In [12]:
dataset = "contact-high-school-classes"
H, labels = read_hypergraph_data(dataset, 10)
names = read_hypergraph_label_names(dataset)
dyadic_modularity_contributions(H, labels, names)
polyadic_modularity_contributions(H, labels, names)

dyadic...
1:   PC                             7.081E-02      
2:   2BIO3                          6.188E-02      
3:   PC*                            6.168E-02      
4:   MP*2                           5.143E-02      
5:   2BIO1                          4.961E-02      
6:   MP                             4.897E-02      
7:   2BIO2                          4.532E-02      
8:   PSI*                           4.491E-02      
9:   MP*1                           2.851E-02      
------
polyadic...
1:   PC                             2.652E+03      
2:   2BIO3                          2.487E+03      
3:   PC*                            2.158E+03      
4:   MP*2                           1.873E+03      
5:   MP                             1.675E+03      
6:   2BIO1                          1.597E+03      
7:   2BIO2                          1.553E+03      
8:   PSI*                           1.260E+03      
9:   MP*1                           6.795E+02      
------


In [13]:
dataset = "contact-primary-school-classes"
H, labels = read_hypergraph_data(dataset, 10)
names = read_hypergraph_label_names(dataset)
;

In [14]:
println("Weight-normalized")
dyadic_modularity_contributions(H, labels, names, 12, true, false)
println("Clique-weighted")
dyadic_modularity_contributions(H, labels, names, 12, false, false)
println("binary")
dyadic_modularity_contributions(H, labels, names, 12, false, true)
polyadic_modularity_contributions(H, labels, names, 12)

Weight-normalized
dyadic...
1:   1B                             3.915E-02      
2:   2B                             3.436E-02      
3:   5B                             3.048E-02      
4:   3B                             2.988E-02      
5:   2A                             2.908E-02      
6:   5A                             2.734E-02      
7:   4A                             2.694E-02      
8:   3A                             2.475E-02      
9:   4B                             2.417E-02      
10:  1A                             2.261E-02      
11:  Teachers                       7.414E-04      
------
Clique-weighted
dyadic...
1:   1B                             4.850E-02      
2:   2B                             3.868E-02      
3:   3B                             3.730E-02      
4:   5B                             3.493E-02      
5:   2A                             3.326E-02      
6:   5A                             3.219E-02      
7:   4A                             3.103E-02      
8: 

In [15]:
for k = 2:5
    H, labels = read_hypergraph_data(dataset, k, k)
    names = read_hypergraph_label_names(dataset)
    println("$k...")
    dyadic_modularity_contributions(H, labels, names)
    polyadic_modularity_contributions(H, labels, names)
end

2...
dyadic...
1:   2B                             2.279E-02      
2:   4B                             2.104E-02      
3:   5B                             1.793E-02      
4:   2A                             1.755E-02      
5:   4A                             1.544E-02      
6:   1A                             1.424E-02      
7:   5A                             1.389E-02      
8:   1B                             1.380E-02      
9:   3A                             1.068E-02      
10:  3B                             1.030E-02      
------
polyadic...
1:   2B                             2.684E+02      
2:   4B                             2.478E+02      
3:   5B                             2.112E+02      
4:   2A                             2.067E+02      
5:   4A                             1.819E+02      
6:   1A                             1.677E+02      
7:   5A                             1.636E+02      
8:   1B                             1.625E+02      
9:   3A                       

In [16]:
dataset = "TrivagoClickout"
H, labels = read_hypergraph_data(dataset, 9)
names = read_hypergraph_label_names(dataset)
dyadic_modularity_contributions(H, labels, names, 5)
polyadic_modularity_contributions(H, labels, names, 5, true)

dyadic...
1:   Brazil                         7.353E-02      
2:   Japan                          6.520E-02      
3:   USA                            6.310E-02      
4:   United Kingdom                 4.756E-02      
5:   Germany                        4.161E-02      
------
polyadic...
1:   Brazil                         1.806E+05      
2:   Japan                          1.804E+05      
3:   USA                            1.746E+05      
4:   United Kingdom                 1.332E+05      
5:   Germany                        1.114E+05      
------
ℓ = 3...
1:   Brazil                         8.763E+04      
2:   USA                            7.261E+04      
3:   Japan                          6.903E+04      
4:   United Kingdom                 5.572E+04      
5:   Mexico                         5.067E+04      
------
ℓ = 4...
1:   Brazil                         4.317E+04      
2:   Japan                          4.289E+04      
3:   USA                            4.042E+04      
4: 

In [17]:
for k = 5:8
    H, labels = read_hypergraph_data(dataset, k, k)
    println("$k...")
    dyadic_modularity_contributions(H, labels, names, 5)
    polyadic_modularity_contributions(H, labels, names, 5)
end

5...
dyadic...
1:   Brazil                         7.842E-02      
2:   Japan                          7.105E-02      
3:   USA                            6.435E-02      
4:   United Kingdom                 4.975E-02      
5:   Germany                        4.370E-02      
------
polyadic...
1:   Japan                          2.591E+04      
2:   USA                            2.418E+04      
3:   Brazil                         2.376E+04      
4:   United Kingdom                 1.842E+04      
5:   Germany                        1.579E+04      
------
6...
dyadic...
1:   Brazil                         7.491E-02      
2:   Japan                          7.027E-02      
3:   USA                            6.606E-02      
4:   United Kingdom                 4.848E-02      
5:   Turkey                         4.296E-02      
------
polyadic...
1:   Japan                          1.539E+04      
2:   USA                            1.469E+04      
3:   Brazil                         1.429