In [None]:
using Revise
using BenchmarkTools

In [None]:
using Support

using GCTGMT

In [None]:
name_to_set_to_element_ = Dict(
    splitpath(path)[end] => read_gmt(path) for
    path in read_directory("/Users/kwat/Desktop/gene_sets/")
)

;

In [None]:
using FeatureSetEnrichment

In [None]:
method_ = Vector{String}()

for method in keys(score_set_new(["a", "b"], [-1.0, 1.0], ["a"]; plot = false))

    push!(method_, string(method, " extreme"), string(method, " area"))

end

n_method = length(method_)

In [None]:
benchmarks_directory_path = "/Users/kwat/Desktop/benchmarks2/"

In [None]:
using DataIO

In [None]:
function load_benchmark(benchmark_directory_path)

    benchmark = splitpath(benchmark_directory_path)[end]

    element_, score_ =
        eachcol(read_data(joinpath(benchmark_directory_path, "gene_x_score.tsv")))

    json_dict = read_json(joinpath(benchmark_directory_path, "gene_sets.json"))

    set_to_element_ = Dict{String,Vector{String}}()

    for gmt in json_dict["gene_sets_tested"]

        merge!(set_to_element_, name_to_set_to_element_[gmt])

    end

    set_ = sort(collect(keys(set_to_element_)))

    n_set = length(set_)

    println("Benchmark ", benchmark, " (", n_set, " set)")

    return element_, score_, set_to_element_, json_dict["gene_sets_positive"]

end

In [None]:
compute = true

;

In [None]:
using Normalization

In [None]:
benchmark_ = Vector{String}()

benchmark_size_ = Vector{Int64}()

benchmark_x_method_row_ = Vector{Vector{Float64}}()

for benchmark_directory_path in read_directory(benchmarks_directory_path)

    element_, score_, set_to_element_, benchmark_set_ =
        load_benchmark(benchmark_directory_path)

    set_ = sort(collect(keys(set_to_element_)))

    if compute

        set_to_method_to_result = sort(score_set_new(element_, score_, set_to_element_))

        set_x_method = Matrix{Float64}(undef, length(set_), n_method)

        for (set_i, (set, method_to_result)) in enumerate(set_to_method_to_result)

            set_x_method_row = []

            for result in values(method_to_result)

                append!(set_x_method_row, result[2:3])

            end

            set_x_method[set_i, :] = set_x_method_row

        end

    end

    for set in benchmark_set_

        benchmark = string(splitpath(benchmark_directory_path)[end], '.', set)

        benchmark_size = length(set_to_element_[set])

        #println("    ", set, " (", benchmark_size, ")")

        push!(benchmark_, benchmark)

        push!(benchmark_size_, benchmark_size)

        if compute

            benchmark_x_method_row = Vector{Float64}()

            for (method_i, set_score_) in enumerate(eachcol(set_x_method))

                is_negative_ = set_score_ .< 0

                is_positive_ = 0 .<= set_score_

                set_negative_ = set_[is_negative_]

                set_positive_ = set_[is_positive_]

                negative_ =
                    .-normalize(.-set_score_[is_negative_], "1224") / sum(is_negative_)

                positive_ = normalize(set_score_[is_positive_], "1224") / sum(is_positive_)

                i_negative = findfirst(set_negative_ .== set)

                i_positive = findfirst(set_positive_ .== set)

                if i_negative != nothing

                    benchmark_score = negative_[i_negative]

                elseif i_positive != nothing

                    benchmark_score = positive_[i_positive]

                else

                    error(method_[method_i], " returned ", benchmark_score)

                end

                push!(benchmark_x_method_row, benchmark_score)

            end

            push!(benchmark_x_method_row_, benchmark_x_method_row)

        end

    end

end

n_benchmark = length(benchmark_)

In [None]:
using CSV
using DataFrames

In [None]:
benchmark_x_method_path = "benchmark_x_method.tsv"

row_name = "Benchmark"

if compute

    df = DataFrame(convert_vector_of_vector_to_matrix(benchmark_x_method_row_), method_)

    insertcols!(df, 1, row_name => benchmark_)

    CSV.write(benchmark_x_method_path, df)

end

benchmark_x_method = read_data(benchmark_x_method_path)

method_ = ["is ks < area", "is ks < extreme", "a idrsw <> area", "a idrdw <> area"]

benchmark_x_method = Matrix(benchmark_x_method[!, method_])

In [None]:
base_name = "is ks < area"

In [None]:
function peek_benchmark(id::String)::Nothing

    benchmark, set = split(id, '.'; limit = 2)

    element_, score_, set_to_element_, benchmark_set_ =
        load_benchmark(joinpath(benchmarks_directory_path, benchmark))

    score_set_new(element_, score_, set_to_element_[set])

    return nothing

end

In [None]:
benchmark_x_method_metric = Matrix{Float64}(undef, size(benchmark_x_method))

for benchmark_i = 1:n_benchmark

    base_score = benchmark_x_method[benchmark_i, 1]

    score_ = benchmark_x_method[benchmark_i, :]

    if base_score < 0

        #peek_benchmark(benchmark_[benchmark_i])

        metric_ = score_ .< base_score

    else

        metric_ = base_score .< score_

    end

    benchmark_x_method_metric[benchmark_i, :] = metric_

end

In [None]:
function sum_sort_print(m::Matrix)::Nothing

    for (n_better, method) in
        zip(sort_like(Float64.(sum(eachrow(m))), method_; r = true)...)

        println(Int64(n_better), "    ", method)

    end

end

In [None]:
function print_header(minimum_size::Int64, maximum_size::Int64, n::Int64)::Nothing

    buffer = "="^8

    println(
        buffer,
        " Size range: ",
        minimum_size,
        " < ",
        maximum_size,
        " (",
        n,
        ") ",
        buffer,
    )

end

In [None]:
benchmark_size_maximum = maximum(benchmark_size_)

print_header(0, benchmark_size_maximum, n_benchmark)

sum_sort_print(benchmark_x_method_metric)

increment = 100

for (minimum_size, maximum_size) in [
    ((i - 1) * increment, i * increment) for
    i = 1:Int64(ceil(benchmark_size_maximum / increment))
]

    is_selected = minimum_size .< benchmark_size_ .< maximum_size

    print_header(minimum_size, maximum_size, sum(is_selected))

    sum_sort_print(benchmark_x_method_metric[is_selected, :])

end

In [None]:
using Plot

In [None]:
use_style!()

In [None]:
using Plotly

In [None]:
x = benchmark_x_method[:, 1]

for method_i = 2:length(method_)

    display(
        plot_x_y(
            [x, x],
            [x, benchmark_x_method[:, method_i]];
            text_ = [benchmark_, benchmark_],
            mode_ = ["line", "markers"],
            layout = Layout(
                xaxis_title_text = base_name,
                yaxis_title_text = method_[method_i],
            ),
        ),
    )

end

In [None]:
peek_benchmark("18.REACTOME_TRANSCRIPTIONAL_ACTIVITY_OF_SMAD2_SMAD3_SMAD4_HETEROTRIMER")