Skip to content

Commit

Permalink
fix an error and increase coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
northwestwitch committed Jun 4, 2020
1 parent 5a6f9bf commit 354c547
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 13 deletions.
1 change: 0 additions & 1 deletion cgbeacon2/cli/add.py
Expand Up @@ -203,7 +203,6 @@ def variants(ds, vcf, sample, panel):
)

if vcf_obj is None:
click.echo(f"Coundn't extract variants from provided VCF file")
raise click.Abort()

nr_variants = count_variants(vcf_obj)
Expand Down
10 changes: 9 additions & 1 deletion cgbeacon2/utils/parse.py
Expand Up @@ -37,7 +37,15 @@ def extract_variants(vcf_file, samples=None, filter=None):
temp_intersections_file.close()

else:
vcf_obj = VCF(vcf_file, samples=list(samples))
try:
vcf_obj = VCF(vcf_file, samples=list(samples))
except Exception as ex:
vcf_obj = VCF(vcf_file)
LOG.error(
f"Invalid VCF or samples. Valid samples are:{vcf_obj.samples}"
)
return

except Exception as err:
LOG.error(f"Error while creating VCF iterator from variant file:{err}")
return
Expand Down
20 changes: 11 additions & 9 deletions cgbeacon2/utils/update.py
Expand Up @@ -23,7 +23,7 @@ def update_dataset(database, dataset_id, samples, add):
n_variants = update_dataset_variant_count(database, dataset_id)

# Update number of allele calls for this dataset
n_alleles = update_dataset_allele_count(database, dataset_obj)
n_alleles = update_dataset_allele_count(database, dataset_id, list(updated_samples))

result = database["dataset"].find_one_and_update(
{"_id": dataset_id},
Expand Down Expand Up @@ -82,12 +82,13 @@ def update_dataset_variant_count(database, dataset_id):
return n_variants


def update_dataset_allele_count(database, dataset_obj):
def update_dataset_allele_count(database, dataset_id, samples):
"""Count how many allele calls are present for a dataset and update dataset object with this number
Accepts:
database(pymongo.database.Database)
dataset_obj(dict): a dataset object
dataset_id(str): id of dataset to be updated
samples(list): list of dataset samples
Returns:
updated_dataset(obj): the updated dataset
Expand All @@ -106,34 +107,35 @@ def update_dataset_allele_count(database, dataset_obj):

# Else count calls for each sample of this dataset in variant collection and sum them up
else:
allele_count = _samples_calls(variant_collection, dataset_obj)
allele_count = _samples_calls(variant_collection, dataset_id, samples)

return allele_count


def _samples_calls(variant_collection, dataset_obj):
def _samples_calls(variant_collection, dataset_id, samples):
"""Count all allele calls for a dataset in variants collection
Accepts:
variant_collection(pymongo.database.Database.Collection)
dataset_obj(dict): a dataset object
dataset_id(str): id of dataset to be updated
samples(list): list of dataset samples
Returns:
allele_count(int)
"""
allele_count = 0
samples = dataset_obj.get("samples", [])

for sample in samples:
pipe = [
{
"$group": {
"_id": None,
"alleles": {
"$sum": f"$datasetIds.test_public.samples.{sample}.allele_count"
"$sum": f"$datasetIds.{dataset_id}.samples.{sample}.allele_count"
},
}
}
]
LOG.error(f"PIPE IS {pipe}")
aggregate_res = variant_collection.aggregate(pipeline=pipe)
for res in aggregate_res:
allele_count += res.get("alleles")
Expand Down
9 changes: 7 additions & 2 deletions tests/cli/add/test_add_variants.py
Expand Up @@ -315,8 +315,6 @@ def test_add_same_variant_different_datasets(
hit_dset2 = {".".join(["datasetIds", registered_dataset["_id"]]): {"$exists": True}}
test_variant = database["variant"].find_one({"$and": [hit_dset1, hit_dset2]})

assert test_variant is not None

# Variant should countain callCount for each sample
callCount1 = test_variant["datasetIds"][public_dataset["_id"]]["samples"][
samples[0]
Expand All @@ -328,6 +326,13 @@ def test_add_same_variant_different_datasets(
# And a cumulative call count as well
assert test_variant["call_count"] == callCount1 + callCount2

# Both dataset objects should be updated with the right number of samples, variants and calls:
for ds in datasets:
updated_dataset = database["dataset"].find_one({"_id": ds["_id"]})
assert len(updated_dataset["samples"]) == 1
assert updated_dataset["variant_count"] > 0
assert updated_dataset["allele_count"] > 0


def test_add_sv_variants(mock_app, public_dataset, database):
"""Test adding SV variants for one sample"""
Expand Down

0 comments on commit 354c547

Please sign in to comment.