Skip to content

Commit

Permalink
Revert "modiefied demo files, save BND SV variants to database"
Browse files Browse the repository at this point in the history
This reverts commit 95f8d72.
  • Loading branch information
northwestwitch committed Jun 23, 2020
1 parent 95f8d72 commit bce818f
Show file tree
Hide file tree
Showing 9 changed files with 10 additions and 41 deletions.
4 changes: 2 additions & 2 deletions cgbeacon2/cli/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ def demo(ctx):
ctx.invoke(
variants,
ds=ds_id,
vcf="cgbeacon2/resources/demo/test_trio.vcf.gz",
vcf="cgbeacon2/resources/demo/643594.clinical.vcf.gz",
sample=[sample],
)

# Invoke add variants command to import all SV variants from demo sample
ctx.invoke(
variants,
ds=ds_id,
vcf="cgbeacon2/resources/demo/test_trio.SV.vcf.gz",
vcf="cgbeacon2/resources/demo/643594.clinical.SV.vcf.gz",
sample=[sample],
)

Expand Down
6 changes: 3 additions & 3 deletions cgbeacon2/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pkg_resources

###### Files ######
test_snv_vcf = "resources/demo/test_trio.vcf.gz"
test_sv_vcf = "resources/demo/test_trio.SV.vcf.gz"
empty_vcf = "resources/demo/empty.SV.vcf.gz"
test_snv_vcf = "resources/demo/643594.clinical.vcf.gz"
test_sv_vcf = "resources/demo/643594.clinical.SV.vcf.gz"
empty_vcf = "resources/demo/empty.clinical.SV.vcf.gz"
panel1 = "resources/demo/panel1.bed"
panel2 = "resources/demo/panel2.bed"

Expand Down
Binary file not shown.
File renamed without changes.
File renamed without changes.
Binary file removed cgbeacon2/resources/demo/test_trio.SV.vcf.gz
Binary file not shown.
12 changes: 4 additions & 8 deletions cgbeacon2/utils/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from cgbeacon2.constants import CHROMOSOMES
from cgbeacon2.models.variant import Variant
from cgbeacon2.utils.parse import variant_called, bnd_mate_name
from cgbeacon2.utils.parse import variant_called

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -78,8 +78,7 @@ def add_variants(database, vcf_obj, samples, assembly, dataset_id, nr_variants):
inserted_vars = 0
with Bar("Processing", max=nr_variants) as bar:
for vcf_variant in vcf_obj:
chrom = vcf_variant.CHROM.replace("chr", "")
if chrom not in CHROMOSOMES:
if vcf_variant.CHROM not in CHROMOSOMES:
LOG.warning(
f"chromosome '{vcf_variant.CHROM}' not included in canonical chromosome list, skipping it."
)
Expand All @@ -94,18 +93,15 @@ def add_variants(database, vcf_obj, samples, assembly, dataset_id, nr_variants):
continue # variant was not called in samples of interest

parsed_variant = dict(
chromosome=chrom,
chromosome=vcf_variant.CHROM,
start=vcf_variant.start, # 0-based coordinate
end=vcf_variant.end, # 0-based coordinate
reference_bases=vcf_variant.REF,
alternate_bases=vcf_variant.ALT,
)

if vcf_variant.var_type == "sv":
sv_type = vcf_variant.INFO["SVTYPE"]
parsed_variant["variant_type"] = sv_type
if sv_type == "BND":
parsed_variant["mate_name"] = bnd_mate_name(vcf_variant.ALT, chrom)
parsed_variant["variant_type"] = vcf_variant.INFO["SVTYPE"]

else:
parsed_variant["variant_type"] = vcf_variant.var_type.upper()
Expand Down
27 changes: 0 additions & 27 deletions cgbeacon2/utils/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
import logging
from cyvcf2 import VCF
import os
import re
from pybedtools.bedtool import BedTool
from tempfile import NamedTemporaryFile

BND_ALT_PATTERN = re.compile(r".*[\],\[](.*?):(.*?)[\],\[]")
CHR_PATTERN = re.compile(r"(chr)?(.*)", re.IGNORECASE)

LOG = logging.getLogger(__name__)


Expand All @@ -32,29 +28,6 @@ def get_vcf_samples(vcf_file):
return vcf_samples


def bnd_mate_name(alt, chrom):
"""Returns chromosome and mate for a BND variant
Accepts:
alt(str): vcf_variant.ALT
chrom(st): cf_variant.CHROM
Returns:
end_chr(str): a chromosome (1-22, X, Y, MT)
"""
end_chrom = chrom
if ":" not in alt:
return end_chrom

match = BND_ALT_PATTERN.match(alt)
# BND will often be translocations between different chromosomes
if match:
other_chrom = match.group(1)
match = CHR_PATTERN.match(other_chrom)
end_chrom = match.group(2)
return end_chrom


def extract_variants(vcf_file, samples=None, filter=None):
"""Parse a VCF file and return its variants as cyvcf2.VCF objects
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/add/test_add_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def test_add_sv_variants(mock_app, public_dataset, database):
saved_vars = list(database["variant"].find())
assert len(saved_vars) > 0

valid_types = ["INS", "DUP", "DEL", "INV", "BND"]
valid_types = ["INS", "DUP", "DEL", "INV"]
# AND all of them should have a valid SV variant type
for var in saved_vars:
assert var["variantType"] in valid_types
Expand Down

0 comments on commit bce818f

Please sign in to comment.