In [None]:
import hail as hl
from gnomad.utils.vcf import adjust_vcf_incompatible_types
from gnomad.utils.sparse_mt import default_compute_info

hl.init(app_name="mt_to_vcf")

In [None]:
mt = hl.read_matrix_table("file:///directflow/ClinicalGenomicsPipeline/dev/2021-02-04-PIPELINE-1885-All-Hail/EricData/Glucoma/gvcf_WES.combined.mt/")

In [None]:
mt = hl.experimental.densify(mt)

In [None]:
mt = mt.filter_rows((hl.len(mt.alleles) > 1) & (hl.agg.any(mt.LGT.is_non_ref())))

In [None]:
mt = mt.annotate_rows(site_dp=hl.agg.sum(mt.DP))

In [None]:
mt = mt.annotate_rows(ANS=hl.agg.count_where(hl.is_defined(mt.LGT)) * 2)

In [None]:
info_ht = default_compute_info(mt, site_annotations=True)

In [None]:
info_ht = info_ht.annotate(
        info=info_ht.info.annotate(DP=mt.rows()[info_ht.key].site_dp)
    )

In [None]:
ht = adjust_vcf_incompatible_types(
        info_ht, 
        # with default INFO_VCF_AS_PIPE_DELIMITED_FIELDS, AS_VarDP will be converted
        # into a pipe-delimited value e.g.: VarDP=|132.1|140.2
        # which breaks VQSR parser (it doesn't recognise the delimiter and treats
        # it as a array with a single string value "|132.1|140.2", leading to 
        # an IndexOutOfBound exception when trying to access value for second allele)
        pipe_delimited_annotations=[]
    )

In [None]:
hl.export_vcf(ht, "file:///home/eriurn/Hail-joint-caller/scripts/reblock_key_alleles.vcf")