In [None]:
using BenchmarkTools
using CSV
using DataFrames
using Kraft
using StatsBase

In [None]:
const genes = readlines("genes.txt")[2:end]

In [None]:
# const gene_values = fill(
#     1 / length(genes),
#     length(genes),
# )
const gene_values = randn(length(genes))

In [None]:
const gene_x_sample = DataFrame(
    Symbol("Gene") => genes,
    Symbol("Sample 1") => gene_values,
    Symbol("Sample 2") => gene_values * 10,
)

CSV.write(
    "gene_x_sample.tsv",
    gene_x_sample,
    delim = '\t',
)

In [None]:
const random_gene_set_genes = sample(
    genes,
    100,
    replace = false,
)

In [None]:
const gene_set_genes = Kraft.read_gmt([
    "h.all.v6.2.symbols.gmt",
])

## Compute gene set enrichment with 1 sample & 1 gene set

In [None]:
Kraft.compute_gene_set_enrichment(
    gene_values,
    genes,
    random_gene_set_genes,
)

In [None]:
@benchmark Kraft.compute_gene_set_enrichment(
    gene_values,
    genes,
    random_gene_set_genes,
)

## Compute gene set enrichment with 1 sample & 1< gene sets

In [None]:
Kraft.compute_gene_set_enrichment(
    gene_values,
    genes,
    gene_set_genes,
)

In [None]:
benchmark_result = @benchmark Kraft.compute_gene_set_enrichment(
    gene_values,
    genes,
    gene_set_genes,
)

ms_per_gene_set = round(
    minimum(benchmark_result.times) / 1e6 / length(gene_set_genes),
    sigdigits = 2,
)

println("$ms_per_gene_set ms / gene set")

benchmark_result

## Compute gene set enrichment with 1< samples & 1< gene sets

In [None]:
statistic = "ks"

In [None]:
Kraft.compute_gene_set_enrichment(
    gene_x_sample,
    gene_set_genes,
    statistic,
)

In [None]:
@benchmark gene_set_x_sample = Kraft.compute_gene_set_enrichment(
    gene_x_sample,
    gene_set_genes,
    statistic,
)

## GSEA

In [None]:
output_directory_path = "gsea_output"

gene_set_x_sample = Kraft.gsea(
    "gene_x_sample.tsv",
    [
        "h.all.v6.2.symbols.gmt",
        "c2.all.v6.2.symbols.gmt",
    ],
    output_directory_path;
    statistic = statistic,
)

gene_set_x_sample = CSV.read(joinpath(
    output_directory_path,
    "gene_set_x_sample.tsv",
))