In [None]:
using GraphMakie
using CSV
using DataFrames
using DelimitedFiles
using SankeyPlots
using ColorSchemes


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling GraphMakie [1ecd5474-83a3-4783-bb4f-06765db800d2]


In [None]:
Tree = Matrix(DataFrame(CSV.File("data/tree_matrix.csv")))



In [None]:
complex_names = vec(readdlm("data/complex_names.txt", '\t', String, '\n'))
monomer_names = vec(readdlm("data/protein_names.txt", '\t', String, '\n'))
cofactor_names = vec(readdlm("data/cofactor_names.txt", '\t', String, '\n'))
element_names = vec(readdlm("data/element_names.txt", '\t', String, '\n'))

protein_names = [complex_names; monomer_names]

C = Matrix(DataFrame(CSV.File("data/C_matrix.csv", header=false)))
P = Matrix(DataFrame(CSV.File("data/P_matrix.csv", header=false)))
E = Matrix(DataFrame(CSV.File("data/E_matrix.csv", header=false)))

counts = vec(Matrix(DataFrame(CSV.File("data/counts.csv", header=false))))


In [None]:
Counts = repeat(counts, 1, length(monomer_names))

C_counts = C .* Counts

C_counts_elements = C_counts * P * E

In [None]:
# get top 10 iron
cur_element = "FE"
element_idx = element_names .== cur_element
top_included = 20

cofactors_with_element_idx = vec(E[:, element_idx] .!= 0)

complexes_by_this_element = vec(C_counts_elements[:, element_idx])
top_element_content_indices = sortperm(complexes_by_this_element, rev=true)[1:top_included]

top_complex_cofactor_map = (C_counts * P)[top_element_content_indices, cofactors_with_element_idx]

In [None]:
src = Vector{Int64}()
dst = Vector{Int64}()
weights = Vector{Int64}()

ordering_cofactors = Vector{Pair{Int64, Int64}}()

n_cplx = size(top_complex_cofactor_map)[1]
n_cofactor = size(top_complex_cofactor_map)[2]

node_labels = [cur_element; cofactor_names[cofactors_with_element_idx]; protein_names[top_element_content_indices]]

n_cofactors_sum = vec(sum((C_counts * P), dims=1))[cofactors_with_element_idx]
n_cofactors_sum_elementwise = n_cofactors_sum .* E[cofactors_with_element_idx, element_idx]


for j in 1:n_cofactor
    push!(src, 1)
    push!(dst, 1+j)
    push!(weights, n_cofactors_sum_elementwise[j])
end

for i in 1:n_cplx
    for j in 1:n_cofactor
        if top_complex_cofactor_map[i,j] != 0
            push!(src, 1+j)
            push!(dst, 1+n_cofactor + i)
            push!(weights, top_complex_cofactor_map[i,j] * E[cofactors_with_element_idx, element_idx][j])
            
        end
    end
end



# create sorting

sort_cofactors = sortperm(vec(n_cofactors_sum_elementwise), rev=true)


for i in 1:(n_cofactor-1)
    
    
    push!(ordering_cofactors, 1+sort_cofactors[i]=>1+sort_cofactors[i+1])
end


already_sorted = Vector{Int64}()

for i in sort_cofactors
    name = node_labels[1+i]
    println("currently sorting $name")
    # exclude last element since redundant
    sorted_column = sortperm(top_complex_cofactor_map[:, i], rev=true)
    nonzero = sum(top_complex_cofactor_map[sorted_column, i] .> 0)
    
    for j in 1:length(sorted_column[1:nonzero-1])
        
        
        cplx_name = node_labels[1+n_cofactor+sorted_column[j]]
        downstream = node_labels[1+n_cofactor+sorted_column[j+1]]
        
        count = top_complex_cofactor_map[sorted_column[j], i]
        
        println("currently sorting $cplx_name with count $count")
        
        if !(sorted_column[j] in already_sorted || sorted_column[j+1] in already_sorted)
            
            println("Connecting node $cplx_name with $downstream")
            
            push!(ordering_cofactors, 
                        1+n_cofactor+sorted_column[j] => 1+n_cofactor+sorted_column[j+1])


        end
    end
    
    append!(already_sorted, sorted_column[1:nonzero-1])
    
end

# create internal sorting of largest cofactor sizes



In [None]:
top_complex_cofactor_map

In [None]:
sorted_column = sortperm(top_complex_cofactor_map[:, 4], rev=true)


In [None]:
node_labels[1+n_cofactor+3]

In [None]:
sankey(src, dst, weights, 
        compact = true,
        node_labels = node_labels,
        node_colors = palette([:purple, :green], 7),
        edge_color = :gradient,
        size=(1000,1000),
        force_order = ordering_cofactors
)

In [None]:
del palette