Skip to content

Commit

Permalink
Merge pull request #19 from thobalose/dev
Browse files Browse the repository at this point in the history
Added phenotype and antibiotic options, and updated CLI setup.
  • Loading branch information
thobalose committed Jun 28, 2019
2 parents 0a16d27 + c07a5a0 commit 3012064
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 121 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ install:

before_script:
- sudo apt-get update
- wget https://repo.continuum.io/miniconda/Miniconda3-4.3.31-Linux-x86_64.sh -O miniconda.sh
- wget https://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
Expand All @@ -36,6 +36,7 @@ before_script:
- conda config --add channels conda-forge
- conda config --add channels bioconda
- conda config --add channels https://conda.anaconda.org/thoba
- conda update --all
- conda info -a
- conda install conda-build anaconda-client

Expand Down
5 changes: 4 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ verify_ssl = true
vcf2neo = {editable = true,path = "."}
autopep8 = "*"
pylint = "*"
pydocstyle = "*"
pep8 = "*"
flake8 = "*"

[packages]
beautifulsoup4 = "==4.5.3"
Expand All @@ -38,7 +41,7 @@ tqdm = "==4.10.0"
wrapt = "==1.10.8"
xmltodict = "==0.10.2"
coverage = "*"
combattbmodel = "*"
combattbmodel = "==0.0.8"

[requires]
python_version = "3.7"
202 changes: 125 additions & 77 deletions Pipfile.lock

Large diffs are not rendered by default.

16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,20 @@ You change the default database location (`localhost`) by setting the
`DATABASE_URL` environment variable to `remote`.

```sh
$ vcf2neo --help
Usage: vcf2neo [OPTIONS] COMMAND [ARGS]...
$ vcf2neo load_vcf --help
Usage: vcf2neo load_vcf [OPTIONS] VCF_DIR [OWNER] [HISTORY_ID] [OUTPUT_DIR]

Load SnpEff annotated VCF files to genes and drugs in NeoDb.

Options:
-p, --phenotype [XDR|MDR|SUSCEPTIBLE|UNKNOWN]
Specify phenotype. [required]
-a, --antibiotic TEXT Specify antibiotic. E.g. Rifampicin
[required]
--help Show this message and exit.

...
$ vcf2neo load_vcf PATH/TO/VCF_DIR
$ vcf2neo load_vcf -p UNKNOWN -a UNKNOWN PATH/TO/VCF_DIR
```

**Exploring variant data**:
Expand Down
15 changes: 7 additions & 8 deletions dc/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@ LABEL Maintainer="thoba@sanbi.ac.za"

RUN apk update \
&& apk upgrade \
&& mkdir /data \
&& apk add --no-cache ca-certificates openssl curl \
&& update-ca-certificates
&& mkdir -p /data/guides \
&& apk add --no-cache openssl wget

ENV DB_VERSION="3057235" \
# ZENODO_URL="https://zenodo.org/record/${DB_VERSION}/files/neodb-db-data.tar.bz2" \
FIGSHARE_URL="https://ndownloader.figshare.com/files/15377531"
ENV DB_VERSION="3258008"

RUN wget "${FIGSHARE_URL}" \
RUN wget "https://zenodo.org/record/${DB_VERSION}/files/neodb-db-data.tar.bz2" \
-O neodb_db_data.tar.bz2 \
&& tar xvfj neodb_db_data.tar.bz2 \
&& rm -rf neodb_db_data.tar.bz2 \
&& cp -r databases /data/
&& cp -r databases /data/ \
&& wget https://raw.githubusercontent.com/COMBAT-TB/combat-tb-neodb/master/dc/guides/combattb_neodb.html \
-P /data/guides/

VOLUME /data
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ services:
NEO4J_ACCEPT_LICENSE_AGREEMENT: 'yes'
NEO4J_dbms_allow__format__migration: 'true'
NEO4J_dbms_allow__upgrade: 'true'
NEO4J_browser_post__connect__cmd: "config; play http://localhost:7474/guides/combattb_neodb.html"
ports:
- "7687:7687"
- "7474:7474"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name='vcf2neo',
version='0.0.8',
version='0.0.9',
url='https://github.com/COMBAT-TB/vcf2neo',
bugtrack_url='https://github.com/COMBAT-TB/vcf2neo/issues',
description='Parses SnpEff annotated VCF files and builds a graph '
Expand Down
5 changes: 4 additions & 1 deletion test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ def cli_runner():


def test_load_vcf(cli_runner):
result = cli_runner.invoke(load_vcf, [TEST_DATA_DIR])
result = cli_runner.invoke(
load_vcf,
['-p', 'MDR', '-a', 'Rifampicin', '-a', 'Isoniazid', TEST_DATA_DIR]
)
assert result.exit_code == 0
56 changes: 31 additions & 25 deletions vcf2neo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from vcf2neo.docker import Docker
from vcf2neo.vcfproc import process_vcf_files

HOST = os.environ.get("DATABASE_URL", "localhost")


@click.group()
def cli():
Expand All @@ -19,45 +21,49 @@ def cli():
pass


try:
# Python 2
u_str = unicode
except NameError:
# Python 3
u_str = str


@cli.command()
@click.argument('vcf_dir', type=click.Path(exists=True, dir_okay=True),
required=True)
@click.argument('owner', type=u_str, required=False)
@click.argument('history_id', type=u_str, required=False)
@click.argument('output_dir', type=click.Path(exists=True, dir_okay=True),
required=False)
@click.option('-d/-D', default=False, help='Run Neo4j docker container.')
def load_vcf(vcf_dir, owner, history_id, d, output_dir=None):
@click.option('--owner', required=True, default=os.environ.get('USER', ''),
show_default='Current $USER', help='Specify owner.')
# @click.argument('history_id', type=u_str, required=False)
# @click.argument('output_dir', type=click.Path(exists=True, dir_okay=True),
# required=False)
# @click.option('--docker/--no-docker', default=False,
# help='Run Combat-TB-NeoDB container.')
@click.option('--phenotype', '-p', required=True,
type=click.Choice(['XDR', 'MDR', 'SUSCEPTIBLE', 'UNKNOWN']),
help='Specify phenotype.')
@click.option('--antibiotic', '-a', multiple=True, required=True,
help='Specify antibiotic. E.g. Rifampicin')
def load_vcf(vcf_dir, owner, phenotype=None, antibiotic=None):
"""
Load SnpEff annotated VCF files to genes and drugs in NeoDb.
"""
docker = None
if d:

# TODO: Look into docker implemantation
container, docker, output_dir, history_id = None, None, None, None
if docker:
if output_dir is None:
exit("When running in Docker spawn mode we need an output dir.")
docker = Docker(output_dir)
docker.run()
http_port = docker.http_port
bolt_port = docker.bolt_port
container = Docker(output_dir)
container.run()
http_port = container.http_port
bolt_port = container.bolt_port
else:
http_port = 7474
bolt_port = 7687

neo_db = NeoDb(host=os.environ.get("DATABASE_URL", "localhost"), password="",
neo_db = NeoDb(host=HOST, password="",
use_bolt=True, bolt_port=bolt_port, http_port=http_port)
start = time.time()
process_vcf_files(neo_db, vcf_dir=vcf_dir,
owner=owner, history_id=history_id)
if d:
docker.stop()
antibiotic = '\t'.join(antibiotic)
process_vcf_files(neo_db, vcf_dir=vcf_dir, phenotype=phenotype,
antibiotic=antibiotic, owner=owner,
history_id=history_id)
if docker:
container.stop()
end = time.time()
sys.stdout.write(f"\nDone in {end - start} ms.\n")

Expand Down
5 changes: 3 additions & 2 deletions vcf2neo/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,13 @@ def create_variant_site_nodes(self, record, known_sites,
self.graph.push(v_set)
return known_sites

def create_call_set_nodes(self, set_name, v_set):
def create_call_set_nodes(self, set_name, phenotype, antibiotic, v_set):
"""
Create CallSet Nodes
:return:
"""
c_set = CallSet(name=set_name)
c_set = CallSet(name=set_name, phenotype=phenotype,
antibiotic=antibiotic)
c_set.belongs_to_vset.add(v_set)
self.graph.create(c_set)
return c_set
6 changes: 4 additions & 2 deletions vcf2neo/vcfproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import vcf


def process_vcf_files(db, vcf_dir, owner=None, history_id=None):
def process_vcf_files(db, vcf_dir, phenotype, antibiotic, owner=None,
history_id=None):
owner = owner if owner else getpass.getuser()
known_sites = dict()
if os.path.isdir(vcf_dir):
Expand All @@ -31,7 +32,8 @@ def process_vcf_files(db, vcf_dir, owner=None, history_id=None):
c_set_name = os.path.basename(
os.path.abspath(_file))
c_set = db.create_call_set_nodes(
set_name=c_set_name, v_set=v_set
set_name=c_set_name, v_set=v_set,
phenotype=phenotype, antibiotic=antibiotic
)
known_sites = get_variant_sites(
db, known_sites, vcf_reader, v_set=v_set,
Expand Down

0 comments on commit 3012064

Please sign in to comment.