diff --git a/.bumpversion.cfg b/.bumpversion.cfg index fd3b89e..2865dfd 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.6.9 +current_version = 2.7.0 commit = True tag = True tag_name = {new_version} diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 344e683..10fbc9e 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -9,13 +9,13 @@ on: jobs: build-n-publish: name: Build and publish Python distribution to PyPI - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - name: Check out git repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python 3.9 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.9 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a267e04..7439b4d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,11 +13,11 @@ jobs: steps: # Check out Scout code - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Set up python - name: Set up Python ${{ matrix.python-version}} - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version}} @@ -30,7 +30,7 @@ jobs: - name: Production Dependencies run: | python -m pip install --upgrade pip - pip install -e . + pip install -e . - name: Dev Dependencies run: | diff --git a/docs/user-guide/loading.md b/docs/user-guide/loading.md index 7ca47c4..67d91ee 100644 --- a/docs/user-guide/loading.md +++ b/docs/user-guide/loading.md @@ -35,7 +35,7 @@ Options: -s, --skip-case-id Do not store case information on variants [default: False] --ensure-index Make sure that the indexes are in place - --gq-treshold INTEGER Treshold to consider variant [default: 20] + --gq-threshold INTEGER Threshold to consider variant [default: 20] -m, --max-window INTEGER Specify the maximum window size for svs [default: 2000] --check-profile PATH Apply sample profiling for the samples, diff --git a/loqusdb/build_models/variant.py b/loqusdb/build_models/variant.py index f547263..ed0ac46 100644 --- a/loqusdb/build_models/variant.py +++ b/loqusdb/build_models/variant.py @@ -139,19 +139,19 @@ def get_coords(variant): return coordinates -def build_variant(variant, case_obj, case_id=None, gq_treshold=None, genome_build=None): +def build_variant(variant, case_obj, case_id=None, gq_threshold=None, gq_qual=False, genome_build=None): """Return a Variant object Take a cyvcf2 formated variant line and return a models.Variant. If criterias are not fullfilled, eg. variant have no gt call or quality - is below gq treshold then return None. + is below gq threshold then return None. Args: variant(cyvcf2.Variant) case_obj(Case): We need the case object to check individuals sex case_id(str): The case id - gq_treshold(int): Genotype Quality treshold + gq_threshold(int): Genotype Quality threshold Return: formated_variant(models.Variant): A variant dictionary @@ -188,8 +188,14 @@ def build_variant(variant, case_obj, case_id=None, gq_treshold=None, genome_buil ind_id = ind_obj["ind_id"] # Get the index position for the individual in the VCF ind_pos = ind_obj["ind_index"] - gq = int(variant.gt_quals[ind_pos]) - if gq_treshold and gq < gq_treshold: + + if gq_qual: + gq = int(variant.QUAL) + + if not gq_qual: + gq = int(variant.gt_quals[ind_pos]) + + if gq_threshold and gq < gq_threshold: continue genotype = GENOTYPE_MAP[variant.gt_types[ind_pos]] diff --git a/loqusdb/commands/load.py b/loqusdb/commands/load.py index 4796c58..c1d2871 100644 --- a/loqusdb/commands/load.py +++ b/loqusdb/commands/load.py @@ -54,7 +54,8 @@ def validate_profile_threshold(ctx, param, value): help="Do not store case information on variants", ) @click.option("--ensure-index", is_flag=True, help="Make sure that the indexes are in place") -@click.option("--gq-treshold", default=20, show_default=True, help="Treshold to consider variant") +@click.option("--gq-threshold", default=20, show_default=True, help="Threshold to consider variant") +@click.option("--qual-gq", is_flag=True, default=False, show_default=True, help="Use QUAL tag instead of GQ value for quality filter") @click.option( "--max-window", "-m", @@ -89,13 +90,14 @@ def load( family_file, family_type, skip_case_id, - gq_treshold, + gq_threshold, case_id, ensure_index, max_window, check_profile, hard_threshold, soft_threshold, + qual_gq ): """Load the variants of a case @@ -136,7 +138,8 @@ def load( family_type=family_type, skip_case_id=skip_case_id, case_id=case_id, - gq_treshold=gq_treshold, + gq_threshold=gq_threshold, + qual_gq=qual_gq, max_window=max_window, profile_file=variant_profile_path, hard_threshold=hard_threshold, diff --git a/loqusdb/commands/update.py b/loqusdb/commands/update.py index e1afe52..88352d9 100644 --- a/loqusdb/commands/update.py +++ b/loqusdb/commands/update.py @@ -47,7 +47,7 @@ help="Do not store case information on variants", ) @click.option("--ensure-index", is_flag=True, help="Make sure that the indexes are in place") -@click.option("--gq-treshold", default=20, show_default=True, help="Treshold to consider variant") +@click.option("--gq-threshold", default=20, show_default=True, help="Threshold to consider variant") @click.option( "--max-window", "-m", @@ -63,7 +63,7 @@ def update( family_file, family_type, skip_case_id, - gq_treshold, + gq_threshold, case_id, ensure_index, max_window, @@ -102,7 +102,7 @@ def update( family_type=family_type, skip_case_id=skip_case_id, case_id=case_id, - gq_treshold=gq_treshold, + gq_threshold=gq_threshold, max_window=max_window, ) except (SyntaxError, CaseError, IOError, VcfError) as error: diff --git a/loqusdb/utils/load.py b/loqusdb/utils/load.py index e145275..280451b 100644 --- a/loqusdb/utils/load.py +++ b/loqusdb/utils/load.py @@ -31,7 +31,8 @@ def load_database( family_file=None, family_type="ped", skip_case_id=False, - gq_treshold=None, + gq_threshold=None, + qual_gq=False, case_id=None, max_window=3000, profile_file=None, @@ -48,7 +49,8 @@ def load_database( family_file(str): Path to family file family_type(str): Format of family file skip_case_id(bool): If no case information should be added to variants - gq_treshold(int): If only quality variants should be considered + gq_threshold(int): If only quality variants should be considered + qual_gq(bool): Use QUAL field instead of GQ format tag to gate quality case_id(str): If different case id than the one in family file should be used max_window(int): Specify the max size for sv windows check_profile(bool): Does profile check if True @@ -87,13 +89,13 @@ def load_database( adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold ) - # If a gq treshold is used the variants needs to have GQ + # If a gq threshold is used the variants needs to have GQ for _vcf_file in vcf_files: # Get a cyvcf2.VCF object vcf = get_vcf(_vcf_file) - if gq_treshold and not vcf.contains("GQ"): - LOG.warning("Set gq-treshold to 0 or add info to vcf {0}".format(_vcf_file)) + if gq_threshold and not vcf.contains("GQ") and not qual_gq: + LOG.warning("Set gq-threshold to 0 or add info to vcf {0}".format(_vcf_file)) raise SyntaxError("GQ is not defined in vcf header") # Get a ped_parser.Family object from family file @@ -143,7 +145,8 @@ def load_database( vcf_obj=vcf_obj, case_obj=case_obj, skip_case_id=skip_case_id, - gq_treshold=gq_treshold, + gq_threshold=gq_threshold, + qual_gq=qual_gq, max_window=max_window, variant_type=variant_type, genome_build=genome_build, @@ -189,7 +192,8 @@ def load_variants( vcf_obj, case_obj, skip_case_id=False, - gq_treshold=None, + gq_threshold=None, + qual_gq=False, max_window=3000, variant_type="snv", genome_build=None, @@ -202,7 +206,7 @@ def load_variants( nr_variants(int) skip_case_id (bool): whether to include the case id on variant level or not - gq_treshold(int) + gq_threshold(int) max_window(int): Specify the max size for sv windows variant_type(str): 'sv' or 'snv' @@ -222,7 +226,7 @@ def load_variants( with click.progressbar(vcf_obj, label="Inserting variants", length=nr_variants) as bar: variants = ( - build_variant(variant, case_obj, case_id, gq_treshold, genome_build=genome_build) + build_variant(variant, case_obj, case_id, gq_threshold, qual_gq, genome_build=genome_build) for variant in bar ) diff --git a/loqusdb/utils/update.py b/loqusdb/utils/update.py index 1ef728f..3a7b63b 100644 --- a/loqusdb/utils/update.py +++ b/loqusdb/utils/update.py @@ -28,7 +28,7 @@ def update_database( family_file=None, family_type="ped", skip_case_id=False, - gq_treshold=None, + gq_threshold=None, case_id=None, max_window=3000, ): @@ -41,7 +41,7 @@ def update_database( family_file(str): Path to family file family_type(str): Format of family file skip_case_id(bool): If no case information should be added to variants - gq_treshold(int): If only quality variants should be considered + gq_threshold(int): If only quality variants should be considered case_id(str): If different case id than the one in family file should be used max_window(int): Specify the max size for sv windows @@ -67,14 +67,14 @@ def update_database( vcf_files.append(sv_file) sv_individuals = vcf_info["individuals"] - # If a gq treshold is used the variants needs to have GQ + # If a gq threshold is used the variants needs to have GQ for _vcf_file in vcf_files: # Get a cyvcf2.VCF object vcf = get_vcf(_vcf_file) - if gq_treshold: + if gq_threshold: if not vcf.contains("GQ"): - LOG.warning("Set gq-treshold to 0 or add info to vcf {0}".format(_vcf_file)) + LOG.warning("Set gq-threshold to 0 or add info to vcf {0}".format(_vcf_file)) raise SyntaxError("GQ is not defined in vcf header") # Get a ped_parser.Family object from family file @@ -127,7 +127,7 @@ def update_database( vcf_obj=vcf_obj, case_obj=case_obj, skip_case_id=skip_case_id, - gq_treshold=gq_treshold, + gq_threshold=gq_threshold, max_window=max_window, variant_type=variant_type, ) diff --git a/requirements.txt b/requirements.txt index b49c639..c9c7c7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ numpy==1.21.4 coloredlogs==14.0 pyyaml==5.4.0 vcftoolbox==1.5 -pip==21.3.1 +pip==23.1.2 setuptools==65.5.1 mongo_adapter>=0.3.3 ped_parser \ No newline at end of file diff --git a/setup.py b/setup.py index e4f70f2..8de700d 100755 --- a/setup.py +++ b/setup.py @@ -10,8 +10,6 @@ import io import os -from pip._internal.network.session import PipSession -from pip._internal.req import parse_requirements from setuptools import find_packages, setup # Package meta-data. @@ -21,13 +19,10 @@ EMAIL = "mans.magnusson@scilifelab.com" AUTHOR = "Måns Magnusson" REQUIRES_PYTHON = ">=3.7.0" -VERSION = "2.6.9" - -requirements = [ - requirement.requirement - for requirement in parse_requirements(filename="./requirements.txt", session=PipSession()) -] +VERSION = "2.7.0" +with open('requirements.txt') as f: + install_requires = f.read().strip().split('\n') # The rest you shouldn't have to touch too much :) # ------------------------------------------------ @@ -68,7 +63,7 @@ entry_points={ "console_scripts": ["loqusdb = loqusdb.__main__:base_command"], }, - install_requires=requirements, + install_requires=install_requires, include_package_data=True, license="MIT", keywords=["vcf", "variants"], diff --git a/tests/conftest.py b/tests/conftest.py index 4db4397..dc079b9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -319,7 +319,7 @@ def variant_obj(request, het_variant, ind_positions, individuals): individuals=individuals, ind_positions=ind_positions, case_id="test", - gq_treshold=None, + gq_threshold=None, ) diff --git a/tests/vcf_tools/test_format_variant.py b/tests/vcf_tools/test_format_variant.py index 5becc12..ea9e6ba 100644 --- a/tests/vcf_tools/test_format_variant.py +++ b/tests/vcf_tools/test_format_variant.py @@ -25,9 +25,9 @@ def test_format_variant_no_gq(variant_no_gq, case_obj): ## GIVEN a variant without GQ variant = variant_no_gq case_id = case_obj["case_id"] - ## WHEN parsing the variant using a GQ treshold + ## WHEN parsing the variant using a GQ threshold formated_variant = build_variant( - variant=variant, case_obj=case_obj, case_id=case_id, gq_treshold=20 + variant=variant, case_obj=case_obj, case_id=case_id, gq_threshold=20 ) ## THEN assert that None is returned since requirements are not fulfilled assert formated_variant is None @@ -38,9 +38,9 @@ def test_format_variant_chr_prefix(variant_chr, case_obj): variant = variant_chr assert variant.CHROM.startswith("chr") case_id = case_obj["case_id"] - ## WHEN parsing the variant using a GQ treshold + ## WHEN parsing the variant using a GQ threshold formated_variant = build_variant( - variant=variant, case_obj=case_obj, case_id=case_id, gq_treshold=20 + variant=variant, case_obj=case_obj, case_id=case_id, gq_threshold=20 ) ## THEN assert that the 'chr' part has been stripped away assert formated_variant["chrom"] == variant.CHROM[3:]