In [1]:
using Pkg; Pkg.activate(".")
using HypergraphModularity
using StatsBase

[32m[1m Activating[22m[39m environment at `~/code/hypergraph_modularities_code/Project.toml`


In [2]:
identity(p::Vector{Int64}) = p

function sum_of_ext_degs(p::Vector{Int64})
    soed = length(p) - 1
    return (sum(p), soed)
end

function all_or_nothing(p::Vector{Int64})
    is_aon = length(p) == 1
    return (sum(p), is_aon)
end

function all_or_nothing2(p::Vector{Int64})
    return is_aon
end

function rainbow(p::Vector{Int64})
    is_rainbow = length(p) == sum(p) && length(p) > 1
    return (sum(p), is_rainbow)
end

function all_or_nothing2(p::Vector{Int64})
    return length(p) == 1
end

all_or_nothing2 (generic function with 1 method)

In [3]:
function MLE_ll(Hyp, Z, agg)
    Ω̂ = estimateΩEmpirically(Hyp, Z; min_val=0, aggregator=agg)
    return Float64(sum(logLikelihood(Hyp, Z, Ω̂; α=0, bigInt=true)))    
end

function dyadic_MLE_ll(Hyp, Z, weighted::Bool, binary::Bool)
    ω_in, ω_out = 
        computeDyadicResolutionParameter(Hyp, Z; mode="ω", weighted=weighted, binary=binary)
    return Float64(dyadicLogLikelihood(Hyp, Z, ω_in, ω_out; weighted=weighted, binary=binary))
end

dyadic_MLE_ll (generic function with 1 method)

In [4]:
H, labels = read_hypergraph_data("walmart-trips", 10)
names = read_hypergraph_label_names("walmart-trips")

11-element Array{String,1}:
 "Clothing, Shoes & Accessories"
 "Electronics and Office"
 "Home, Furniture & Appliances"
 "Home Improvement & Patio"
 "Baby"
 "Toys, Games, and Video Games"
 "Food, Household & Pets"
 "Pharmacy, Health & Beauty"
 "Sports, Fitness & Outdoors"
 "Auto, Tires & Industrial"
 "Other"

In [5]:
h_lls = Float64[]
g_lls = Float64[]
for z in unique(labels)
    Z = ones(Int64, length(labels))
    Z[labels .== z] .= 2
    
    h_ll = MLE_ll(H, Z, sum_of_ext_degs)
    g_ll = dyadic_MLE_ll(H, Z, false, false)
    
    push!(h_lls, h_ll)
    push!(g_lls, g_ll)
end

In [6]:
hsp = sortperm(h_lls, rev=true)
gsp = sortperm(g_lls, rev=true)
println(rpad("",     5), rpad("Hypergraph",  35), "\t", rpad("Graph",       30))
for (i, (j, k)) in enumerate(zip(hsp, gsp))
    println(rpad("$i: ", 5), rpad("$(names[j])", 35), "\t", rpad("$(names[k])", 30))
end

     Hypergraph                         	Graph                         
1:   Clothing, Shoes & Accessories      	Electronics and Office        
2:   Home Improvement & Patio           	Sports, Fitness & Outdoors    
3:   Home, Furniture & Appliances       	Toys, Games, and Video Games  
4:   Food, Household & Pets             	Pharmacy, Health & Beauty     
5:   Pharmacy, Health & Beauty          	Auto, Tires & Industrial      
6:   Electronics and Office             	Baby                          
7:   Baby                               	Home Improvement & Patio      
8:   Toys, Games, and Video Games       	Other                         
9:   Other                              	Clothing, Shoes & Accessories 
10:  Sports, Fitness & Outdoors         	Home, Furniture & Appliances  
11:  Auto, Tires & Industrial           	Food, Household & Pets        


In [7]:
H, labels = read_hypergraph_data("TrivagoClickout", 10)
names = read_hypergraph_label_names("TrivagoClickout")
name_map = Dict(name => j for (j, name) in enumerate(names))
;

In [8]:
cm = countmap(labels)
scm = sort(collect(cm), by=x->x[2], rev=true)
tot = length(labels)
for (k, v) in scm[1:10]
    @show names[k], v
    tot -= v
end
@show tot

(names[k], v) = ("USA", 15519)
(names[k], v) = ("Brazil", 14921)
(names[k], v) = ("Japan", 10065)
(names[k], v) = ("India", 9963)
(names[k], v) = ("Italy", 9699)
(names[k], v) = ("Spain", 8654)
(names[k], v) = ("United Kingdom", 8430)
(names[k], v) = ("Germany", 8269)
(names[k], v) = ("Turkey", 6186)
(names[k], v) = ("Mexico", 5749)
tot = 75283


75283

In [9]:
#=
s_am = ["Brazil", "Colombia", "Argentina", "Peru", "Venezuela", "Chile", "Ecuador", "Bolivia",
        "Bolivia", "Paraguay", "Uruguay", "Guyana", "French Guiana"]
n_am = ["USA", "Canada", "Mexico"]
europe = ["Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland",
          "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", 
          "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden", "Turkey", 
          "United Kingdom", "Switzerland"]
asia = ["Japan", "Thailand", "Taiwan", "Indonesia", "Philippines", "Vietnam", "China"]

region_names = ["South America", "North America", "Europe", "Asia"]
=#

In [10]:
h_lls = Float64[]
g_lls = Float64[]
loc_names = []
#for region in [s_am, n_am, europe, asia]
#    inds = [name_map[c] for c in region]
for (k, _) in scm[1:10]
    Z = ones(Int64, length(labels))
    #Z[[l for l in labels if l in inds]] .= 2
    Z[labels .== k] .= 2
    push!(loc_names, names[k])
    
    h_ll = MLE_ll(H, Z, sum_of_ext_degs)
    g_ll = dyadic_MLE_ll(H, Z, false, false)
    
    push!(h_lls, h_ll)
    push!(g_lls, g_ll)    
end

In [11]:
hsp = sortperm(h_lls, rev=true)
gsp = sortperm(g_lls, rev=true)
println(rpad("",     5), rpad("Hypergraph",  35), "\t", rpad("Graph",       30))
for (i, (j, k)) in enumerate(zip(hsp, gsp))
    println(rpad("$i: ", 5), rpad("$(loc_names[j])", 35), "\t", rpad("$(loc_names[k])", 30))
end

     Hypergraph                         	Graph                         
1:   Brazil                             	Brazil                        
2:   Japan                              	USA                           
3:   USA                                	Japan                         
4:   United Kingdom                     	United Kingdom                
5:   Germany                            	Germany                       
6:   Turkey                             	Italy                         
7:   Spain                              	Spain                         
8:   Mexico                             	Turkey                        
9:   India                              	Mexico                        
10:  Italy                              	India                         
