Skip to content

Commit

Permalink
decouple genotype api (#772)
Browse files Browse the repository at this point in the history
* Better tests for genotype API
  • Loading branch information
Måns Magnusson committed Sep 29, 2020
1 parent 64d4bfd commit ef7cb25
Show file tree
Hide file tree
Showing 27 changed files with 734 additions and 337 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Expand Up @@ -13,6 +13,12 @@ Try to use the following format:
### Changed
### Fixed

## [13.2.0]

### Changed
- Only talk to genotype via subprocess and CLI


## [13.1.0]

### Changed
Expand Down
4 changes: 1 addition & 3 deletions Pipfile
Expand Up @@ -14,6 +14,7 @@ docutils = "*"
# black
black = "*"
pre-commit = "*"
Cython = "*"

[packages]
alchy = "*"
Expand All @@ -36,14 +37,11 @@ cyvcf2 = "<0.10.0"
pymongo = "<3.7"
colorclass = "*"
cairocffi = "*"
pysam = "*"
genologics = "*"
trailblazer = "*"
housekeeper = "*"
genotype = "*"
scout-browser = "*"
cgstats = "*"
Cython = "*"
SQLAlchemy = "*"
Flask = "*"
Flask-Admin = "*"
Expand Down
229 changes: 131 additions & 98 deletions Pipfile.lock

Large diffs are not rendered by default.

96 changes: 49 additions & 47 deletions cg/apps/gt.py
@@ -1,18 +1,15 @@
"""Interactions with the genotype tool"""

import logging

from subprocess import CalledProcessError
import subprocess

from alchy import Manager
from genotype.store import api, models
from genotype.load.vcf import load_vcf
from cg.exc import CaseNotFoundError
from cg.utils.commands import Process

LOG = logging.getLogger(__name__)


class GenotypeAPI(Manager):

class GenotypeAPI:
"""Interface with Genotype app.
The config should contain a 'genotype' key:
Expand All @@ -21,60 +18,65 @@ class GenotypeAPI(Manager):
"""

def __init__(self, config: dict):
alchy_config = dict(SQLALCHEMY_DATABASE_URI=config["genotype"]["database"])
super(GenotypeAPI, self).__init__(config=alchy_config, Model=models.Model)
self.process = Process(
binary=config["genotype"]["binary_path"], config=config["genotype"]["config_path"]
)
self.dry_run = False

self.genotype_config = config["genotype"]["config_path"]
self.genotype_binary = config["genotype"]["binary_path"]
self.base_call = [self.genotype_binary, "--config", self.genotype_config]
def set_dry_run(self, dry_run: bool) -> None:
"""Set the dry run state"""
self.dry_run = dry_run

def upload(self, bcf_path: str, samples_sex: dict, force: bool = False):
def upload(self, bcf_path: str, samples_sex: dict, force: bool = False) -> None:
"""Upload genotypes for a family of samples."""
snps = api.snps()
analyses = load_vcf(bcf_path, snps)
for analysis_obj in analyses:
LOG.debug("loading VCF genotypes for sample: %s", analysis_obj.sample_id)
is_saved = api.add_analysis(self, analysis_obj, replace=force)
if is_saved:
LOG.info("loaded VCF genotypes for sample: %s", analysis_obj.sample_id)
else:
LOG.warning("skipped, found previous analysis: %s", analysis_obj.sample_id)

if is_saved or force:
analysis_obj.sex = samples_sex[analysis_obj.sample_id]["analysis"]
analysis_obj.sample.sex = samples_sex[analysis_obj.sample_id]["pedigree"]
self.commit()
upload_parameters = ["load", str(bcf_path)]
if force:
upload_parameters.append("--force")

LOG.info("loading VCF genotypes for sample(s): %s", ", ".join(samples_sex.keys()))
self.process.run_command(parameters=upload_parameters, dry_run=self.dry_run)

for sample_id in samples_sex:
# This is the sample sex specified by the customer
sample_sex = samples_sex[sample_id]["pedigree"]
self.update_sample_sex(sample_id, sample_sex)
# This is the predicted sex based on variant calls from the pipeline
analysis_predicted_sex = samples_sex[sample_id]["analysis"]
self.update_analysis_sex(sample_id, sex=analysis_predicted_sex)

def update_sample_sex(self, sample_id: str, sex: str) -> None:
"""Update the sex for a sample in the genotype tool"""
sample_sex_parameters = ["add-sex", sample_id, "-s", sex]
LOG.debug("Set sex for sample %s to %s", sample_id, sex)
self.process.run_command(parameters=sample_sex_parameters, dry_run=self.dry_run)

def update_analysis_sex(self, sample_id: str, sex: str) -> None:
"""Update the predicted sex for a sample based on genotype analysis in the genotype tool"""
analysis_sex_parameters = ["add-sex", sample_id, "-a", "sequence", sex]
LOG.debug("Set predicted sex for sample %s to %s for the sequence analysis", sample_id, sex)
self.process.run_command(parameters=analysis_sex_parameters, dry_run=self.dry_run)

def export_sample(self, days: int = 0) -> str:
"""Export sample info."""
trending_call = self.base_call[:]
trending_call.extend(["export-sample", "-d", str(days)])
try:
LOG.info("Running Genotype API to get data.")
LOG.debug(trending_call)
output = subprocess.check_output(trending_call)
except CalledProcessError as error:
LOG.critical("Could not run command: %s", " ".join(trending_call))
raise error
output = output.decode("utf-8")
export_sample_parameters = ["export-sample", "-d", str(days)]

self.process.run_command(parameters=export_sample_parameters, dry_run=self.dry_run)
output = self.process.stdout
# If sample not in genotype db, stdout of genotype command will be empty.
if not output:
raise CaseNotFoundError("samples not found in genotype db")
return output

def export_sample_analysis(self, days: int = 0) -> str:
"""Export analysis."""
trending_call = self.base_call[:]
trending_call.extend(["export-sample-analysis", "-d", str(days)])
try:
LOG.info("Running Genotype API to get data.")
LOG.debug(trending_call)
output = subprocess.check_output(trending_call)
except CalledProcessError as error:
LOG.critical("Could not run command: %s", " ".join(trending_call))
raise error
output = output.decode("utf-8")
export_sample_analysis_parameters = ["export-sample-analysis", "-d", str(days)]

self.process.run_command(parameters=export_sample_analysis_parameters, dry_run=self.dry_run)
output = self.process.stdout
# If sample not in genotype db, stdout of genotype command will be empty.
if not output:
raise CaseNotFoundError("samples not found in genotype db")
return output

def __str__(self):
return f"GenotypeAPI(dry_run: {self.dry_run})"
4 changes: 2 additions & 2 deletions cg/cli/upload/base.py
Expand Up @@ -27,7 +27,7 @@
from .mutacc import process_solved, processed_solved
from .observations import observations
from .scout import scout, upload_case_to_scout
from .utils import _suggest_cases_to_upload
from .utils import suggest_cases_to_upload
from .validate import validate

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -127,7 +127,7 @@ def upload(context, family_id, force_restart):
return

if not family_id:
_suggest_cases_to_upload(context)
suggest_cases_to_upload(context)
context.abort()

family_obj = context.obj["status"].family(family_id)
Expand Down
4 changes: 2 additions & 2 deletions cg/cli/upload/coverage.py
Expand Up @@ -5,7 +5,7 @@
from cg.apps import coverage as coverage_app
from cg.meta.upload.coverage import UploadCoverageApi

from .utils import _suggest_cases_to_upload
from .utils import suggest_cases_to_upload


@click.command()
Expand All @@ -18,7 +18,7 @@ def coverage(context, re_upload, family_id):
click.echo(click.style("----------------- COVERAGE --------------------"))

if not family_id:
_suggest_cases_to_upload(context)
suggest_cases_to_upload(context)
context.abort()

chanjo_api = coverage_app.ChanjoAPI(context.obj)
Expand Down
6 changes: 3 additions & 3 deletions cg/cli/upload/delivery_report.py
Expand Up @@ -8,7 +8,7 @@
from cg.apps import hk, scoutapi
from cg.exc import DeliveryReportError, CgError

from .utils import _suggest_cases_delivery_report
from .utils import suggest_cases_delivery_report

LOG = logging.getLogger(__name__)
SUCCESS = 0
Expand Down Expand Up @@ -173,7 +173,7 @@ def _update_delivery_report_date(status_api, case_id):
report_api = context.obj["report_api"]

if not family_id:
_suggest_cases_delivery_report(context)
suggest_cases_delivery_report(context)
context.abort()

if print_console:
Expand Down Expand Up @@ -234,7 +234,7 @@ def _get_delivery_report_from_hk(hk_api: hk.HousekeeperAPI, family_id):
return uploaded_delivery_report_files[0].full_path

if not case_id:
_suggest_cases_delivery_report(context)
suggest_cases_delivery_report(context)
context.abort()

hk_api = context.obj["housekeeper_api"]
Expand Down
14 changes: 8 additions & 6 deletions cg/cli/upload/genotype.py
@@ -1,9 +1,12 @@
"""Code for uploading genotype data via CLI"""
import click
import logging

from cg.meta.upload.gt import UploadGenotypesAPI

from .utils import _suggest_cases_to_upload
from .utils import suggest_cases_to_upload

LOG = logging.getLogger(__name__)


@click.command()
Expand All @@ -16,16 +19,15 @@ def genotypes(context, re_upload, family_id):
click.echo(click.style("----------------- GENOTYPES -------------------"))

if not family_id:
_suggest_cases_to_upload(context)
suggest_cases_to_upload(context)
context.abort()

tb_api = context.obj["tb_api"]
gt_api = context.obj["genotype_api"]
hk_api = context.obj["housekeeper_api"]
status_api = context.obj["status"]
family_obj = status_api.family(family_id)
upload_genotypes_api = UploadGenotypesAPI(hk_api=hk_api, gt_api=gt_api)
results = upload_genotypes_api.data(family_obj.analyses[0])

api = UploadGenotypesAPI(status_api, hk_api, tb_api, gt_api)
results = api.data(family_obj.analyses[0])
if results:
api.upload(results, replace=re_upload)
upload_genotypes_api.upload(results, replace=re_upload)
4 changes: 2 additions & 2 deletions cg/cli/upload/scout.py
Expand Up @@ -7,7 +7,7 @@
from cg.apps import hk
from cg.meta.upload.scoutapi import UploadScoutAPI

from .utils import _suggest_cases_to_upload
from .utils import suggest_cases_to_upload

LOG = logging.getLogger(__name__)

Expand All @@ -23,7 +23,7 @@ def scout(context, re_upload, print_console, case_id):
click.echo(click.style("----------------- SCOUT -----------------------"))

if not case_id:
_suggest_cases_to_upload(context)
suggest_cases_to_upload(context)
context.abort()

tb_api = context.obj["tb_api"]
Expand Down
4 changes: 2 additions & 2 deletions cg/cli/upload/utils.py
Expand Up @@ -28,14 +28,14 @@ def all_samples_list_analyses(links: List[models.FamilySample]) -> list:
return [link.sample.application_version.application.analysis_type for link in links]


def _suggest_cases_to_upload(context):
def suggest_cases_to_upload(context):
LOG.warning("provide a case, suggestions:")
records = context.obj["status"].analyses_to_upload()[:50]
for family_obj in records:
click.echo(family_obj)


def _suggest_cases_delivery_report(context):
def suggest_cases_delivery_report(context):
LOG.error("provide a case, suggestions:")
records = context.obj["status"].analyses_to_delivery_report()[:50]
for family_obj in records:
Expand Down
4 changes: 2 additions & 2 deletions cg/cli/upload/validate.py
Expand Up @@ -4,7 +4,7 @@

from cg.apps import coverage as coverage_app

from .utils import _suggest_cases_to_upload
from .utils import suggest_cases_to_upload


@click.command()
Expand All @@ -16,7 +16,7 @@ def validate(context, family_id):
click.echo(click.style("----------------- VALIDATE --------------------"))

if not family_id:
_suggest_cases_to_upload(context)
suggest_cases_to_upload(context)
context.abort()

family_obj = context.obj["status"].family(family_id)
Expand Down

0 comments on commit ef7cb25

Please sign in to comment.