In [None]:
include("../src/ProcessSequence.jl")

In [None]:
project_dir = dirname(@__DIR__)

using JSON: parse

project_json = parse(read(joinpath(project_dir, "project.json"), String))

output_dir = joinpath(project_dir, "output")

input_dir = joinpath(project_dir, "input")

reference_dir = joinpath(input_dir, "reference")

baseline_vcf = joinpath(input_dir, "high_confidence_HG002_4.1_GRCh38/HG002_GRCh38_1_22_v4.1_draft_benchmark.vcf.gz")

calls_vcf = joinpath(output_dir, "process_germ_dna_HG002_cat/find_variant/strelka/results/variants/variants.vcf.gz")

rtg_sdf_reference_dir = joinpath(input_dir, "GRCh38.sdf")

benchmark_output_dir = joinpath(output_dir, "process_germ_dna_HG002_cat/benchmark")

rtg_executable = "/opt/rtg/rtg-tools-3.11/rtg"

### Configure vcfeval

In [None]:
run(`$rtg_executable`)

### Download Real Time Genomics prepared GRCH38 files
vcfeval requires reference files in RTG formats. RTG makes versions of these files for the popular reference genomes such as GRCh38, which will be downloaded below.

In [None]:
if ispath(rtg_sdf_reference_dir)
    
    println("Skipping download because RTG GRCh38 data directory already exists:\n $rtg_sdf_reference_dir")

else

    run(pipeline(
            `wget -q -P ../input/ https://s3.amazonaws.com/rtg-datasets/references/GRCh38.sdf.zip`,
            `unzip -d ../input/ ../input/GRCh38.sdf.zip`,
            `rm ../input/GRCh38.sdf.zip`,
            ))
end

### Run vcfeval
This command comapares the VCF you generated (calls) to the highly confident VCF generated by Genome in a Bottle (baseline).

In [None]:
run(`$rtg_executable vcfeval
    --baseline=$baseline_vcf
    --calls=$calls_vcf
    --template=$rtg_sdf_reference_dir
    --output=$benchmark_output_dir
    --threads=1
    `)

### See results
Read more about how to interpret vcfeval results [here](https://cdn.rawgit.com/RealTimeGenomics/rtg-tools/master/installer/resources/tools/RTGOperationsManual/rtg_command_reference.html#vcfeval).

In [None]:
vcfeval_summary_file = open(joinpath(benchmark_output_dirput_dir, "summary.txt"))

vcfeval_summary = read(vcfeval_summary_file)

vcfeval_summary