In [1]:
using Pkg
Pkg.activate(".")

[32m[1m Activating[22m[39m environment at `~/code/hypergraph_modularities_code/Project.toml`


In [2]:
using HypergraphModularity
using LinearAlgebra
using Printf
using SparseArrays
using StatsBase

In [3]:
function all_or_nothing(p::Vector{Int64})
    is_aon = length(p) == 1
    return (sum(p), is_aon)
end

function polyadic_AON_MLE(Hyp, Z)
    Ω̂ = estimateΩEmpirically(Hyp, Z; min_val=0, aggregator=all_or_nothing)
    ll = Float64(sum(logLikelihood(Hyp, Z, Ω̂; α=0, bigInt=true)))
    return (ll, Ω̂)    
end

function dyadic_MLE(Hyp, Z, weighted::Bool, binary::Bool)
    ω_in, ω_out = 
        computeDyadicResolutionParameter(Hyp, Z; mode="ω", weighted=weighted, binary=binary)
    γ = (ω_in - ω_out) / (log(ω_in) - log(ω_out))
    Q = dyadicModularity(Hyp, Z, γ; weighted=weighted, binary=binary)
    ll = Float64(dyadicLogLikelihood(Hyp, Z, ω_in, ω_out; weighted=weighted, binary=binary))
    return (ll, ω_in, ω_out, γ, Q)
end

dyadic_MLE (generic function with 1 method)

In [4]:
function decomposedDyadicModularity(H, Z, γ, weighted, binary)
    G = CliqueExpansion(H, weighted, binary)
    cluster_ids = unique(Z)
    
    # edge terms
    edges = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    for (i, j, v) in zip(SparseArrays.findnz(G)...)
        if Z[i] == Z[j]
            edges[Z[i]] += v
        end
	end

    # volume terms                                                                                                                                                   
    d = vec(sum(G, dims=1))
    vols = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    for (ci, di) in zip(Z, d)
        vols[ci] += di
    end

    # modularity terms
    volG = sum(d)
    Qs = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    for c in cluster_ids
        Qs[c] = (edges[c] - γ * vols[c]^2 / volG) / volG
    end

    return Qs
end

decomposedDyadicModularity (generic function with 1 method)

In [5]:
function decomposedAONPolyadicModularity(H, Z, Ω)
    cluster_ids = unique(Z)
    all_Qs = Dict()
    combined_Qs = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
    
    for ℓ = 2:maximum(keys(H.E))
        if haskey(H.E, ℓ)
            # Volume and W terms
            vols = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
            Ws = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
            for (edge, weight) in H.E[ℓ]
                Zedge = [Z[v] for v in edge]
                if all(z -> z == Zedge[1], Zedge)
                   Ws[Zedge[1]] += weight
                end
                for c in Zedge
                   vols[c] += weight
                end
            end
            
            # Modularities
            ω1 = Ω.ω((ℓ, true),  0)
            ω0 = Ω.ω((ℓ, false), 0)
            α = log(ω1) - log(ω0)
            β = ω1 - ω0
            Qs = Dict{Int64, Float64}(c => 0.0 for c in cluster_ids)
            for c in cluster_ids
                Qs[c] = α * Ws[c] - β * vols[c]^ℓ
                combined_Qs[c] += Qs[c]
            end
            
            all_Qs[ℓ] = Qs
        end
    end
    
    return all_Qs, combined_Qs
end

decomposedAONPolyadicModularity (generic function with 1 method)

In [6]:
function dyadic_modularity_contributions(H, labels, names, topk=10, weighted=false, binary=true)
    g_ll, ω_in, ω_out, γ, Q = dyadic_MLE(H, labels, weighted, binary)
    dyadicQs = decomposedDyadicModularity(H, labels, γ, weighted, binary)
    stopind = min(topk, length(names))

    # Dyadic from clique expansion
    println("dyadic...")
    dyadicQ = sort([(name, dyadicQs[i]) for (i, name) in enumerate(names)], by=kv->kv[2], rev=true)[1:stopind]
    for (i, (name, Q)) in enumerate(dyadicQ)
        println(rpad("$i: ", 5), 
                rpad(name, 31),
                rpad(@sprintf("%.3E", Q), 15))
    end
    println("------")
end

function polyadic_modularity_contributions(H, labels, names, topk=10, individual=false)
    h_ll, Ω̂ = polyadic_AON_MLE(H, labels)
    Qs, combined_Qs = decomposedAONPolyadicModularity(H, labels, Ω̂)
    stopind = min(topk, length(names))
   
    # Polyadic (combined)
    println("polyadic...")
    polyadicQ = sort([(name, combined_Qs[i]) for (i, name) in enumerate(names)], by=kv->kv[2], rev=true)[1:stopind]
    for (i, (name, Q)) in enumerate(polyadicQ)
        println(rpad("$i: ", 5), 
                rpad(name, 31),
                rpad(@sprintf("%.3E", Q), 15))
    end
    println("------")
    
    if individual
        # Polyadic (per hyperedge size)
        for key in sort(collect(keys(Qs)))
            println("ℓ = $(key)...")
            Qℓ = Qs[key]
            order_ℓ = sort([(name, Qℓ[i]) for (i, name) in enumerate(names)], by=kv->kv[2], rev=true)[1:stopind]
            for (i, (name, Q)) in enumerate(order_ℓ)
                println(rpad("$i: ", 5), 
                        rpad(name, 31),
                        rpad(@sprintf("%.3E", Q), 15))
            end
            println("------")
        end
    end
end

polyadic_modularity_contributions (generic function with 3 methods)

In [7]:
dataset = "walmart-trips"
H, labels = read_hypergraph_data(dataset, 10)
names = read_hypergraph_label_names(dataset)

# throw out "Other" category
other_key = "Other"
other_ind = findfirst(names .== other_key)
keep = Vector{Bool}(labels .!= other_ind)
(subH, node_map) = subhypergraph(H, keep)
sub_labels = zeros(Int64, maximum(subH.N))
for (i, j) in node_map
    sub_labels[j] = labels[i]
end

# Just to keep rest of the code the same
H = subH
labels = sub_labels
names = names[names .!= other_key]
;

In [8]:
dyadic_modularity_contributions(H, labels, names)
polyadic_modularity_contributions(H, labels, names)

dyadic...
1:   Food, Household & Pets         9.198E-02      
2:   Pharmacy, Health & Beauty      4.885E-02      
3:   Clothing, Shoes & Accessories  4.247E-02      
4:   Home, Furniture & Appliances   1.378E-02      
5:   Baby                           1.238E-02      
6:   Auto, Tires & Industrial       8.468E-03      
7:   Home Improvement & Patio       6.702E-03      
8:   Electronics and Office         6.170E-03      
9:   Toys, Games, and Video Games   4.499E-03      
10:  Sports, Fitness & Outdoors     3.582E-03      
------
polyadic...
1:   Food, Household & Pets         3.350E+04      
2:   Pharmacy, Health & Beauty      5.358E+03      
3:   Clothing, Shoes & Accessories  4.605E+03      
4:   Home, Furniture & Appliances   1.200E+03      
5:   Baby                           1.034E+03      
6:   Electronics and Office         9.652E+02      
7:   Auto, Tires & Industrial       9.269E+02      
8:   Home Improvement & Patio       5.612E+02      
9:   Toys, Games, and Video Games  

In [9]:
dataset = "TrivagoClickout"
H, labels = read_hypergraph_data(dataset, 5)
names = read_hypergraph_label_names(dataset)
dyadic_modularity_contributions(H, labels, names)
polyadic_modularity_contributions(H, labels, names)

dyadic...
1:   Brazil                         7.734E-02      
2:   Japan                          6.544E-02      
3:   USA                            6.273E-02      
4:   United Kingdom                 4.816E-02      
5:   Germany                        4.163E-02      
6:   Spain                          4.007E-02      
7:   India                          3.830E-02      
8:   Mexico                         3.665E-02      
9:   Italy                          3.601E-02      
10:  Turkey                         3.514E-02      
------
polyadic...
1:   Brazil                         2.199E+05      
2:   Japan                          1.671E+05      
3:   USA                            1.587E+05      
4:   United Kingdom                 1.130E+05      
5:   Germany                        9.253E+04      
6:   Mexico                         9.020E+04      
7:   Spain                          8.771E+04      
8:   India                          8.507E+04      
9:   Turkey                        

In [10]:
dataset = "congress-bills"
H, labels = read_hypergraph_data(dataset, 10)
names = read_hypergraph_label_names(dataset)
dyadic_modularity_contributions(H, labels, names)
polyadic_modularity_contributions(H, labels, names)

dyadic...
1:   1 democrat                     5.421E-02      
2:   2 republican                   5.247E-02      
------
polyadic...
1:   1 democrat                     1.684E+04      
2:   2 republican                   1.063E+04      
------


In [11]:
dataset = "contact-high-school-classes"
H, labels = read_hypergraph_data(dataset, 10)
names = read_hypergraph_label_names(dataset)
dyadic_modularity_contributions(H, labels, names)
polyadic_modularity_contributions(H, labels, names)

dyadic...
1:   PC                             7.081E-02      
2:   2BIO3                          6.188E-02      
3:   PC*                            6.168E-02      
4:   MP*2                           5.143E-02      
5:   2BIO1                          4.961E-02      
6:   MP                             4.897E-02      
7:   2BIO2                          4.532E-02      
8:   PSI*                           4.491E-02      
9:   MP*1                           2.851E-02      
------
polyadic...
1:   PC                             3.655E+03      
2:   2BIO3                          3.555E+03      
3:   PC*                            2.617E+03      
4:   MP*2                           2.373E+03      
5:   MP                             1.966E+03      
6:   2BIO2                          1.913E+03      
7:   2BIO1                          1.878E+03      
8:   PSI*                           1.376E+03      
9:   MP*1                           7.233E+02      
------


In [12]:
dataset = "contact-primary-school-classes"
H, labels = read_hypergraph_data(dataset, 10)
names = read_hypergraph_label_names(dataset)
;

In [13]:
println("Weight-normalized")
dyadic_modularity_contributions(H, labels, names, 12, true, false)
println("Clique-weighted")
dyadic_modularity_contributions(H, labels, names, 12, false, false)
println("binary")
dyadic_modularity_contributions(H, labels, names, 12, false, true)
polyadic_modularity_contributions(H, labels, names, 12)

Weight-normalized
dyadic...
1:   1B                             3.915E-02      
2:   2B                             3.436E-02      
3:   5B                             3.048E-02      
4:   3B                             2.988E-02      
5:   2A                             2.908E-02      
6:   5A                             2.734E-02      
7:   4A                             2.694E-02      
8:   3A                             2.475E-02      
9:   4B                             2.417E-02      
10:  1A                             2.261E-02      
11:  Teachers                       7.414E-04      
------
Clique-weighted
dyadic...
1:   1B                             4.850E-02      
2:   2B                             3.868E-02      
3:   3B                             3.730E-02      
4:   5B                             3.493E-02      
5:   2A                             3.326E-02      
6:   5A                             3.219E-02      
7:   4A                             3.103E-02      
8: 

In [14]:
for k = 2:5
    H, labels = read_hypergraph_data(dataset, k, k)
    names = read_hypergraph_label_names(dataset)
    println("$k...")
    dyadic_modularity_contributions(H, labels, names)
end

2...
dyadic...
1:   2B                             2.279E-02      
2:   4B                             2.104E-02      
3:   5B                             1.793E-02      
4:   2A                             1.755E-02      
5:   4A                             1.544E-02      
6:   1A                             1.424E-02      
7:   5A                             1.389E-02      
8:   1B                             1.380E-02      
9:   3A                             1.068E-02      
10:  3B                             1.030E-02      
------
3...
dyadic...
1:   2B                             3.461E-02      
2:   2A                             3.114E-02      
3:   5B                             3.024E-02      
4:   5A                             3.015E-02      
5:   1B                             2.955E-02      
6:   4B                             2.879E-02      
7:   4A                             2.807E-02      
8:   1A                             2.747E-02      
9:   3B                    