Ok, so in this notebook we are going to generate a *dyadic* graph, which we will then attempt to cluster. 

In [128]:
## Generate a graph
using Pkg; Pkg.activate(".")
using HypergraphModularity

using StatsBase
using SparseArrays

# parameters

dataset = "TrivagoClickout"
kmax_ = 100

H, Z = read_hypergraph_data(dataset,kmax_)

[32m[1m Activating[22m[39m environment at `~/hypergraph_modularities_code/Project.toml`


(hypergraph
  N: Array{Int64}((172738,)) [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  172729, 172730, 172731, 172732, 172733, 172734, 172735, 172736, 172737, 172738]
  E: Dict{Int64,Dict}
  D: Array{Int64}((172738,)) [6, 6, 5, 1, 1, 4, 3, 3, 3, 4  …  4, 1, 2, 2, 1, 7, 8, 5, 1, 4]
, [1, 2, 3, 4, 5, 5, 6, 7, 8, 1  …  14, 5, 8, 17, 3, 81, 17, 29, 55, 47])

In [129]:
@time  G = CliqueExpansion(H, false);

  0.538146 seconds (3.37 M allocations: 246.028 MiB, 10.24% gc time)


In [130]:
function CliqueExpansion_(H;weighted=false, dtype = Int64)
    
    if weighted & (dtype <: Integer)
        print("Don't use integer data type to record weighted edges; exiting.")
        return false
    end
    n = length(H.D)
    I = Vector{dtype}()
    J = Vector{dtype}()
    V = Vector{dtype}()
    ks = setdiff(keys(H.E),1)
    for k in ks
        for edge in keys(H.E[k])
            weight = H.E[k][edge]
            for i = 1:k-1
                ei = edge[i]
                for j = i+1:k
                    ej = edge[j]
                    push!(I,ei)
                    push!(J,ej)
                    if weighted
                        push!(V,weight/(k-1))
                    else
                        push!(V,weight)
                    end
                end
            end
        end
    end
    A = SparseArrays.sparse(I,J,V,n,n)
    for i = 1:n; A[i,i] = 0.0; end
    SparseArrays.dropzeros!(A)
    A = SparseArrays.sparse(A+A')
    return A
end

CliqueExpansion_ (generic function with 1 method)

In [131]:
@time G = CliqueExpansion_(H; weighted= false, dtype = UInt32);

  0.722936 seconds (3.83 M allocations: 163.001 MiB, 26.60% gc time)


In [132]:
function ConstructAdj(C::SparseArrays.SparseMatrixCSC,n::Int64)
    rp = C.rowval
    ci = C.colptr
    Neighbs = Vector{Vector{Int64}}()
    d = zeros(Int64,n)
    for i = 1:n
        # chop up the rp vector and put it in Neighbs
        push!(Neighbs,rp[ci[i]:ci[i+1]-1])
        d[i] = ci[i+1]-ci[i]
    end

    # d is the number of neighbors. This is the unweighted degree,
    # but note importantly that if the original graph is weighted this is
    # not the same as the degree vector d we will sometimes use
    return Neighbs, d
end

ConstructAdj (generic function with 1 method)

In [135]:
@time ConstructAdj(G, length(H.D));

  0.089072 seconds (345.50 k allocations: 57.920 MiB, 51.11% gc time)


In [137]:
size(G)

(172738, 172738)