Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand CLI #208

Merged
merged 15 commits into from Feb 5, 2020
1 change: 1 addition & 0 deletions .travis.yml
Expand Up @@ -13,6 +13,7 @@ install:
- mv sambamba_v0.5.8 ./bin/sambamba
- chmod +x ./bin/sambamba
- export PATH="${PATH}:${PWD}/bin"
- pip install six==1.12.0
- pip install -r requirements.txt -r requirements-dev.txt .

before_script:
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Expand Up @@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## Unreleased

### Added
- added CLI commands

## [4.0.0] - 2016-08-02
Version 4 slims down Chanjo quite a bit.

Expand Down
56 changes: 41 additions & 15 deletions chanjo/calculate.py
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
"""Module for calculate operations on database"""

from sqlalchemy.sql import func

from chanjo.store.models import Sample, Transcript, TranscriptStat
from chanjo.store.models import Transcript, TranscriptStat


class CalculateMixin:
Expand All @@ -10,23 +11,48 @@ class CalculateMixin:

def mean(self, sample_ids=None):
"""Calculate the mean values of all metrics per sample."""
sql_query = (self.query(TranscriptStat.sample_id,
func.avg(TranscriptStat.mean_coverage),
func.avg(TranscriptStat.completeness_10),
func.avg(TranscriptStat.completeness_15),
func.avg(TranscriptStat.completeness_20),
func.avg(TranscriptStat.completeness_50),
func.avg(TranscriptStat.completeness_100))
.group_by(TranscriptStat.sample_id))
sql_query = self.query(
TranscriptStat.sample_id,
func.avg(TranscriptStat.mean_coverage),
func.avg(TranscriptStat.completeness_10),
func.avg(TranscriptStat.completeness_15),
func.avg(TranscriptStat.completeness_20),
func.avg(TranscriptStat.completeness_50),
func.avg(TranscriptStat.completeness_100),
).group_by(TranscriptStat.sample_id)
if sample_ids:
sql_query = sql_query.filter(TranscriptStat.sample_id.in_(sample_ids))
return sql_query

def gene_metrics(self, *genes):
"""Calculate gene statistics."""
query = (self.mean()
.add_column(Transcript.gene_id)
.join(TranscriptStat.transcript)
.filter(Transcript.gene_id.in_(genes))
.group_by(Transcript.gene_id))
query = (
self.mean()
.add_column(Transcript.gene_id)
.join(TranscriptStat.transcript)
.filter(Transcript.gene_id.in_(genes))
.group_by(Transcript.gene_id)
)
return query

def sample_coverage(self, sample_ids: list, genes: list) -> dict:
"""Calculate coverage for samples."""
query = self.query(
TranscriptStat.sample_id.label('sample_id'),
func.avg(TranscriptStat.mean_coverage).label('mean_coverage'),
func.avg(TranscriptStat.completeness_10).label('mean_completeness'),
).join(
Transcript,
).filter(
Transcript.gene_id.in_(genes),
TranscriptStat.sample_id.in_(sample_ids),
).group_by(TranscriptStat.sample_id)

data = {
result.sample_id: {
"mean_coverage": result.mean_coverage,
"mean_completeness": result.mean_completeness,
}
for result in query
}
return data
18 changes: 16 additions & 2 deletions chanjo/cli/calculate.py
@@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-
"""Functions for calculating operations on database"""
import json
import logging

import click

from chanjo.store.api import ChanjoDB
from chanjo.store.constants import STAT_COLUMNS
from chanjo.store.constants import STAT_COLUMNS, OMIM_GENE_IDS

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -37,3 +37,17 @@ def mean(context, sample, pretty):
for result in query:
row = {column: value for column, value in zip(columns, result)}
click.echo(dump_json(row, pretty=pretty))


@calculate.command()
@click.option('-p', '--pretty', is_flag=True, help="Print in pretty format")
@click.option('-s', '--sample', multiple=True, type=str, help="Sample to get coverage for")
@click.option('-o', '--omim', is_flag=True, help="Use genes in the OMIM panel")
@click.argument('genes', nargs=-1)
@click.pass_context
def coverage(context, pretty, sample, omim, genes):
"""Calculate coverage for sample on specified genes"""
if omim:
genes = OMIM_GENE_IDS
query = context.obj['db'].sample_coverage(sample_ids=sample, genes=list(genes))
click.echo(dump_json(query, pretty=pretty))
75 changes: 62 additions & 13 deletions chanjo/cli/db.py
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
"""CLI commands to interact with db"""

import json
import logging

import click
Expand All @@ -9,36 +11,83 @@
LOG = logging.getLogger(__name__)


@click.group('db')
@click.group("db")
@click.pass_context
def db_cmd(context):
"""Interact with the database for maintainance tasks."""
context.obj['db'] = ChanjoDB(uri=context.obj['database'])
context.obj["db"] = ChanjoDB(uri=context.obj["database"])


@db_cmd.command()
@click.option('--reset', is_flag=True, help='tear down existing db')
@click.option("--reset", is_flag=True, help="tear down existing db")
@click.pass_context
def setup(context, reset):
"""Initialize a new datbase from scratch."""
if reset:
LOG.info('tearing down existing database')
context.obj['db'].tear_down()
LOG.info('setting up new database')
context.obj['db'].set_up()
LOG.info("tearing down existing database")
context.obj["db"].tear_down()
LOG.info("setting up new database")
context.obj["db"].set_up()


@db_cmd.command()
@click.argument('sample_id', type=str)
@click.argument("sample_id", type=str)
@click.pass_context
def remove(context, sample_id):
"""Remove all traces of a sample from the database."""
store = context.obj['db']
LOG.debug('find sample in database with id: %s', sample_id)
store = context.obj["db"]
LOG.debug("find sample in database with id: %s", sample_id)
sample_obj = Sample.query.get(sample_id)
if sample_obj is None:
LOG.warning('sample (%s) not found in database', sample_id)
LOG.warning("sample (%s) not found in database", sample_id)
context.abort()
LOG.info('delete sample (%s) from database', sample_id)
LOG.info("delete sample (%s) from database", sample_id)
store.session.delete(sample_obj)
store.save()


@db_cmd.command()
@click.option("--group-id", "-g", help="Display samples in a group")
@click.option("--sample-id", "-s", help="Display sample")
@click.option("--pretty", "-p", is_flag=True, help="Print in pretty format")
@click.pass_context
def samples(context, group_id, sample_id, pretty):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if samples=None or of the sample does not exists? Could you add a test or two?

"""Display samples from database"""

store = context.obj["db"]
query = store.fetch_samples(sample_id=sample_id, group_id=group_id)
indent = None
if pretty:
indent = 4
click.echo(
json.dumps([dict(result) for result in query], default=str, indent=indent)
)


@db_cmd.command()
@click.option("--sample-id", "-s", help="Samples to limit query to")
@click.option("--pretty", "-p", is_flag=True, help="Print in pretty format")
@click.pass_context
def transcripts(context, sample_id, pretty):
"""Display transcripts from database"""
store = context.obj["db"]
query = store.fetch_transcripts(sample_id=sample_id)
indent = None
if pretty:
indent = 4
click.echo(
json.dumps([dict(result) for result in query], default=str, indent=indent)
)


@db_cmd.command()
@click.option("--group-id", "-g", help="Delete entire group")
@click.option("--sample-id", "-s", help="Delete sample")
@click.pass_context
def delete(context, group_id, sample_id):
"""Delete from database"""
store = context.obj["db"]
if sample_id:
store.delete_sample(sample_id=sample_id)
elif group_id:
store.delete_group(group_id=group_id)
5 changes: 3 additions & 2 deletions chanjo/store/api.py
Expand Up @@ -4,14 +4,15 @@
import os

from alchy import Manager

from chanjo.calculate import CalculateMixin
from .models import BASE
from .fetch import FetchMixin
from .delete import DeleteMixin

log = logging.getLogger(__name__)


class ChanjoDB(Manager, CalculateMixin):
class ChanjoDB(Manager, CalculateMixin, DeleteMixin, FetchMixin):
"""SQLAlchemy-based database object.

Bundles functionality required to setup and interact with various
Expand Down