From 354c547bed32b7b33d94deee07cbff169d7114ba Mon Sep 17 00:00:00 2001 From: northwestwitch Date: Thu, 4 Jun 2020 16:07:50 +0200 Subject: [PATCH] fix an error and increase coverage --- cgbeacon2/cli/add.py | 1 - cgbeacon2/utils/parse.py | 10 +++++++++- cgbeacon2/utils/update.py | 20 +++++++++++--------- tests/cli/add/test_add_variants.py | 9 +++++++-- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/cgbeacon2/cli/add.py b/cgbeacon2/cli/add.py index c2a945d8..b49b1d5b 100644 --- a/cgbeacon2/cli/add.py +++ b/cgbeacon2/cli/add.py @@ -203,7 +203,6 @@ def variants(ds, vcf, sample, panel): ) if vcf_obj is None: - click.echo(f"Coundn't extract variants from provided VCF file") raise click.Abort() nr_variants = count_variants(vcf_obj) diff --git a/cgbeacon2/utils/parse.py b/cgbeacon2/utils/parse.py index f6351016..3c55b77d 100644 --- a/cgbeacon2/utils/parse.py +++ b/cgbeacon2/utils/parse.py @@ -37,7 +37,15 @@ def extract_variants(vcf_file, samples=None, filter=None): temp_intersections_file.close() else: - vcf_obj = VCF(vcf_file, samples=list(samples)) + try: + vcf_obj = VCF(vcf_file, samples=list(samples)) + except Exception as ex: + vcf_obj = VCF(vcf_file) + LOG.error( + f"Invalid VCF or samples. Valid samples are:{vcf_obj.samples}" + ) + return + except Exception as err: LOG.error(f"Error while creating VCF iterator from variant file:{err}") return diff --git a/cgbeacon2/utils/update.py b/cgbeacon2/utils/update.py index 9284df36..b4e1f6e3 100644 --- a/cgbeacon2/utils/update.py +++ b/cgbeacon2/utils/update.py @@ -23,7 +23,7 @@ def update_dataset(database, dataset_id, samples, add): n_variants = update_dataset_variant_count(database, dataset_id) # Update number of allele calls for this dataset - n_alleles = update_dataset_allele_count(database, dataset_obj) + n_alleles = update_dataset_allele_count(database, dataset_id, list(updated_samples)) result = database["dataset"].find_one_and_update( {"_id": dataset_id}, @@ -82,12 +82,13 @@ def update_dataset_variant_count(database, dataset_id): return n_variants -def update_dataset_allele_count(database, dataset_obj): +def update_dataset_allele_count(database, dataset_id, samples): """Count how many allele calls are present for a dataset and update dataset object with this number Accepts: database(pymongo.database.Database) - dataset_obj(dict): a dataset object + dataset_id(str): id of dataset to be updated + samples(list): list of dataset samples Returns: updated_dataset(obj): the updated dataset @@ -106,22 +107,22 @@ def update_dataset_allele_count(database, dataset_obj): # Else count calls for each sample of this dataset in variant collection and sum them up else: - allele_count = _samples_calls(variant_collection, dataset_obj) + allele_count = _samples_calls(variant_collection, dataset_id, samples) return allele_count -def _samples_calls(variant_collection, dataset_obj): +def _samples_calls(variant_collection, dataset_id, samples): """Count all allele calls for a dataset in variants collection Accepts: variant_collection(pymongo.database.Database.Collection) - dataset_obj(dict): a dataset object + dataset_id(str): id of dataset to be updated + samples(list): list of dataset samples Returns: - + allele_count(int) """ allele_count = 0 - samples = dataset_obj.get("samples", []) for sample in samples: pipe = [ @@ -129,11 +130,12 @@ def _samples_calls(variant_collection, dataset_obj): "$group": { "_id": None, "alleles": { - "$sum": f"$datasetIds.test_public.samples.{sample}.allele_count" + "$sum": f"$datasetIds.{dataset_id}.samples.{sample}.allele_count" }, } } ] + LOG.error(f"PIPE IS {pipe}") aggregate_res = variant_collection.aggregate(pipeline=pipe) for res in aggregate_res: allele_count += res.get("alleles") diff --git a/tests/cli/add/test_add_variants.py b/tests/cli/add/test_add_variants.py index 49f7482e..3c089bab 100644 --- a/tests/cli/add/test_add_variants.py +++ b/tests/cli/add/test_add_variants.py @@ -315,8 +315,6 @@ def test_add_same_variant_different_datasets( hit_dset2 = {".".join(["datasetIds", registered_dataset["_id"]]): {"$exists": True}} test_variant = database["variant"].find_one({"$and": [hit_dset1, hit_dset2]}) - assert test_variant is not None - # Variant should countain callCount for each sample callCount1 = test_variant["datasetIds"][public_dataset["_id"]]["samples"][ samples[0] @@ -328,6 +326,13 @@ def test_add_same_variant_different_datasets( # And a cumulative call count as well assert test_variant["call_count"] == callCount1 + callCount2 + # Both dataset objects should be updated with the right number of samples, variants and calls: + for ds in datasets: + updated_dataset = database["dataset"].find_one({"_id": ds["_id"]}) + assert len(updated_dataset["samples"]) == 1 + assert updated_dataset["variant_count"] > 0 + assert updated_dataset["allele_count"] > 0 + def test_add_sv_variants(mock_app, public_dataset, database): """Test adding SV variants for one sample"""