Skip to content

Commit

Permalink
improved how end coord is calculated for SVs
Browse files Browse the repository at this point in the history
  • Loading branch information
northwestwitch committed Jun 23, 2020
1 parent b11094d commit 73c1e14
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 5 deletions.
16 changes: 14 additions & 2 deletions cgbeacon2/utils/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from cgbeacon2.constants import CHROMOSOMES
from cgbeacon2.models.variant import Variant
from cgbeacon2.utils.parse import variant_called, bnd_mate_name
from cgbeacon2.utils.parse import variant_called, bnd_mate_name, sv_end

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -104,8 +104,20 @@ def add_variants(database, vcf_obj, samples, assembly, dataset_id, nr_variants):
if vcf_variant.var_type == "sv":
sv_type = vcf_variant.INFO["SVTYPE"]
parsed_variant["variant_type"] = sv_type

alt = vcf_variant.ALT[0]

# Check if a better variant end can be extracted from INFO field
end = sv_end(
pos=vcf_variant.POS,
alt=alt,
svend=vcf_variant.INFO.get("END"),
svlen=vcf_variant.INFO.get("SVLEN"),
)
parsed_variant["end"] = end

if sv_type == "BND":
parsed_variant["mate_name"] = bnd_mate_name(vcf_variant.ALT, chrom)
parsed_variant["mate_name"] = bnd_mate_name(alt, chrom)

else:
parsed_variant["variant_type"] = vcf_variant.var_type.upper()
Expand Down
30 changes: 29 additions & 1 deletion cgbeacon2/utils/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def bnd_mate_name(alt, chrom):
"""Returns chromosome and mate for a BND variant
Accepts:
alt(str): vcf_variant.ALT
alt(str): vcf_variant.ALT[0]
chrom(st): cf_variant.CHROM
Returns:
Expand All @@ -47,6 +47,7 @@ def bnd_mate_name(alt, chrom):
return end_chrom

match = BND_ALT_PATTERN.match(alt)

# BND will often be translocations between different chromosomes
if match:
other_chrom = match.group(1)
Expand All @@ -55,6 +56,33 @@ def bnd_mate_name(alt, chrom):
return end_chrom


def sv_end(pos, alt, svend=None, svlen=None):
"""Return the end coordinate for a structural variant
Accepts:
pos(int): variant start, 1-based
alt(str)
svend(int)
svlen(int)
Returns:
end(int)
"""
end = svend

if ":" in alt:
match = BND_ALT_PATTERN.match(alt)
if match:
end = int(match.group(2))

if svend == pos:
if svlen:
end = pos + svlen

return end - 1 # coordinate should be zero-based


def extract_variants(vcf_file, samples=None, filter=None):
"""Parse a VCF file and return its variants as cyvcf2.VCF objects
Expand Down
4 changes: 3 additions & 1 deletion tests/cli/add/test_add_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,9 @@ def test_add_BND_SV_variants(mock_app, public_dataset, database):
# ALL of them should have a valid SV variant type
assert var["variantType"] == "BND"
# AND a mateName (end chromosome)
assert var["mateName"] is not None
assert (
var["mateName"] != var["referenceName"]
) # just for the variants in the demo file


def test_add_snv_sv_variants(mock_app, public_dataset, database):
Expand Down
37 changes: 36 additions & 1 deletion tests/utils/test_parse.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,42 @@
# -*- coding: utf-8 -*-
import pybedtools
from cgbeacon2.resources import panel1_path, panel2_path
from cgbeacon2.utils.parse import merge_intervals, extract_variants
from cgbeacon2.utils.parse import (
merge_intervals,
extract_variants,
bnd_mate_name,
sv_end,
)

ALT = "G]17:198982]"


def test_bnd_mate_name():
"""Test the function that extract mate name from a variant ALT field"""

mate = bnd_mate_name(ALT, "2")
assert mate == "17"


def test_sv_end_SVEND():
"""Test the function that calculates the end coordinate of a structural variant in the presence of SVEND.
Example:
2 321682 . T <DEL> 6 PASS SVTYPE=DEL;END=321887;SVLEN=-205;CIPOS=-56,20;CIEND=-10,62 GT:GQ 0/1:12
"""
end = sv_end(pos=321682, alt="<DEL>", svend=321887, svlen=-205)
assert end == 321886


def test_sv_end_BND():
"""Test the function that calculates the end coordinate of a BND structural variant.
Example:
2 321681 bnd_W G G]17:198982] 6 PASS SVTYPE=BND;MATEID=bnd_Y GT 0/1
"""
end = sv_end(pos=321681, alt=ALT, svend=None, svlen=None)
assert end == 198981


def test_merge_intervals():
Expand Down

0 comments on commit 73c1e14

Please sign in to comment.