Skip to content

Commit

Permalink
Merge pull request #23 from MayroseLab/IA_bowtie2
Browse files Browse the repository at this point in the history
IA bowtie2
  • Loading branch information
soungalo committed May 23, 2023
2 parents 4ccc48b + eeee4fb commit 81cc93b
Show file tree
Hide file tree
Showing 4 changed files with 256 additions and 23 deletions.
2 changes: 1 addition & 1 deletion conda_env/EVM.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- bioconda
- conda-forge
dependencies:
- evidencemodeler
- evidencemodeler=1.1.1
- perl-db_file
- perl-uri=1.76
- perl-dbi=1.642
6 changes: 6 additions & 0 deletions conda_env/bowtie2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: bowtie2
channels:
- bioconda
- conda-forge
dependencies:
- bowtie2=2.5.1
236 changes: 231 additions & 5 deletions conda_env/edta.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,236 @@
name: EDTA
channels:
- conda-forge
- bioconda
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- _tflow_select=2.3.0=mkl
- abseil-cpp=20211102.0=h93e1e8c_3
- absl-py=1.2.0=pyhd8ed1ab_0
- aiohttp=3.8.1=py38h0a891b7_1
- aiosignal=1.2.0=pyhd8ed1ab_0
- alsa-lib=1.2.3.2=h166bdaf_0
- astunparse=1.6.3=pyhd8ed1ab_0
- async-timeout=4.0.2=pyhd8ed1ab_0
- attrs=22.1.0=pyh71513ae_1
- biopython=1.79=py38h0a891b7_2
- blast=2.5.0=hc0b0e79_3
- blinker=1.4=py_1
- boost=1.73.0=py38hd103949_1
- boost-cpp=1.73.0=h7f8727e_12
- brotlipy=0.7.0=py38h0a891b7_1004
- bzip2=1.0.8=h7f98852_4
- c-ares=1.18.1=h7f98852_0
- ca-certificates=2022.9.14=ha878542_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- cachetools=5.2.0=pyhd8ed1ab_0
- cairo=1.16.0=h18b612c_1001
- cd-hit=4.8.1=h5b5514e_7
- certifi=2022.9.14=pyhd8ed1ab_0
- cffi=1.15.1=py38h4a40e3a_0
- charset-normalizer=2.1.1=pyhd8ed1ab_0
- click=8.1.3=py38h578d9bd_0
- coreutils=9.1=h166bdaf_0
- cryptography=37.0.4=py38h2b5fc30_0
- curl=7.84.0=h5eee18b_0
- dill=0.3.5.1=pyhd8ed1ab_0
- drmaa=0.7.9=py_1000
- edta=1.9.6=0
- entrez-direct=16.2=he881be0_1
- expat=2.4.8=h27087fc_0
- flatbuffers=2.0.7=h27087fc_0
- fontconfig=2.14.0=hc2a2eb6_1
- freetype=2.12.1=hca18f0e_0
- fribidi=1.0.10=h36c2ea0_0
- frozenlist=1.3.1=py38h0a891b7_0
- gast=0.5.3=pyhd8ed1ab_0
- genericrepeatfinder=1.0=h9f5acd7_2
- genometools-genometools=1.6.1=py38h23571c4_2
- gettext=0.19.8.1=h73d1719_1008
- giflib=5.2.1=h36c2ea0_2
- glib=2.72.1=h6239696_0
- glib-tools=2.72.1=h6239696_0
- glob2=0.7=py_0
- gmp=6.1.2=hf484d3e_1000
- gnutls=3.5.19=h2a4e5f8_1
- google-auth=2.11.0=pyh6c4a22f_0
- google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
- google-pasta=0.2.0=pyh8c360ce_0
- graphite2=1.3.14=h295c915_1
- grpc-cpp=1.46.4=h6fc47f4_3
- grpcio=1.46.4=py38h148feb7_3
- h5py=3.7.0=nompi_py38h045baee_101
- harfbuzz=2.8.1=h6f93f22_0
- hdf5=1.12.2=nompi_h2386368_100
- hmmer=3.3.2=h87f3376_2
- icu=58.2=hf484d3e_1000
- idna=3.4=pyhd8ed1ab_0
- importlib-metadata=4.11.4=py38h578d9bd_0
- joblib=1.2.0=pyhd8ed1ab_0
- jpeg=9e=h166bdaf_2
- keras=2.9.0=pyhd8ed1ab_0
- keyutils=1.6.1=h166bdaf_0
- krb5=1.19.3=h3790be6_0
- lcms2=2.12=hddcbb42_0
- ld_impl_linux-64=2.36.1=hea4e1c9_2
- lerc=4.0.0=h27087fc_0
- libabseil=20211102.0=cxx17_h48a1fff_3
- libblas=3.9.0=16_linux64_openblas
- libboost=1.73.0=h28710b8_12
- libcblas=3.9.0=16_linux64_openblas
- libcurl=7.84.0=h91b91d3_0
- libdeflate=1.14=h166bdaf_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=h516909a_1
- libffi=3.4.2=h7f98852_5
- libgcc=7.2.0=h69d50b8_2
- libgcc-ng=12.1.0=h8d9b700_16
- libgd=2.3.3=h695aa2c_1
- libgfortran-ng=12.1.0=h69a702a_16
- libgfortran5=12.1.0=hdcd56e2_16
- libglib=2.72.1=h2d90d5f_0
- libgomp=12.1.0=h8d9b700_16
- libiconv=1.16=h516909a_0
- libidn2=2.3.3=h166bdaf_0
- liblapack=3.9.0=16_linux64_openblas
- libnghttp2=1.47.0=hdcd2b5c_1
- libnsl=2.0.0=h7f98852_0
- libopenblas=0.3.21=pthreads_h78a6416_3
- libpng=1.6.38=h753d276_0
- libprotobuf=3.20.1=h6239696_4
- libsqlite=3.39.3=h753d276_0
- libssh2=1.10.0=haa6b8db_3
- libstdcxx-ng=12.1.0=ha89aaad_16
- libtiff=4.4.0=h55922b4_4
- libunistring=0.9.10=h7f98852_0
- libuuid=2.32.1=h7f98852_1000
- libwebp-base=1.2.4=h166bdaf_0
- libxcb=1.13=h7f98852_1004
- libzlib=1.2.12=h166bdaf_3
- ltr_finder=1.07=h9f5acd7_2
- ltr_retriever=2.9.0=hdfd78af_1
- markdown=3.4.1=pyhd8ed1ab_0
- markupsafe=2.1.1=py38h0a891b7_1
- mdust=2006.10.17=hec16e2b_4
- multidict=6.0.2=py38h0a891b7_1
- multiprocess=0.70.13=py38h0a891b7_1
- muscle=5.1=h9f5acd7_1
- ncurses=6.3=h27087fc_1
- nettle=3.3=0
- nseg=1.0.1=hec16e2b_2
- numpy=1.23.3=py38h3a7f9d9_0
- oauthlib=3.2.1=pyhd8ed1ab_0
- openjdk=11.0.9.1=h5cc2fde_1
- openssl=1.1.1q=h166bdaf_0
- opt_einsum=3.3.0=pyhd8ed1ab_1
- packaging=21.3=pyhd8ed1ab_0
- pandas=1.5.0=py38h8f669ce_0
- pango=1.42.4=h69149e4_5
- pcre=8.45=h9c3ff4c_0
- perl=5.22.0.1=0
- perl-app-cpanminus=1.7043=pl5.22.0_0
- perl-archive-tar=2.18=pl5.22.0_2
- perl-carp=1.38=pl5.22.0_0
- perl-common-sense=3.74=0
- perl-compress-raw-bzip2=2.069=1
- perl-compress-raw-zlib=2.069=3
- perl-constant=1.33=pl5.22.0_0
- perl-data-dumper=2.161=pl5.22.0_0
- perl-devel-symdump=2.18=pl5.22.0_0
- perl-exporter=5.72=pl5.22.0_0
- perl-exporter-tiny=0.042=1
- perl-extutils-makemaker=7.24=pl5.22.0_1
- perl-file-path=2.12=pl5.22.0_0
- perl-file-temp=0.2304=pl5.22.0_0
- perl-file-which=1.20=0
- perl-gd=2.56=pl522h470a237_9
- perl-io-compress=2.069=pl5.22.0_2
- perl-io-zlib=1.10=1
- perl-json=2.90=1
- perl-json-xs=2.34=0
- perl-list-moreutils=0.428=pl5.22.0_0
- perl-parent=0.236=pl5.22.0_0
- perl-pathtools=3.73=h470a237_2
- perl-scalar-list-utils=1.45=2
- perl-test-more=1.001002=pl5.22.0_0
- perl-text-soundex=3.05=pl5.22.0.1_0
- perl-threaded=5.32.1=hdfd78af_1
- pip=22.2.2=pyhd8ed1ab_0
- pixman=0.38.0=h516909a_1003
- pp=1.6.4.4=py_0
- protobuf=3.20.1=py38hfa26641_0
- pthread-stubs=0.4=h36c2ea0_1001
- pyasn1=0.4.8=py_0
- pyasn1-modules=0.2.7=py_0
- pycparser=2.21=pyhd8ed1ab_0
- pyjwt=2.5.0=pyhd8ed1ab_0
- pyopenssl=22.0.0=pyhd8ed1ab_0
- pyparsing=3.0.9=pyhd8ed1ab_0
- pysocks=1.7.1=pyha2e5f31_6
- python=3.8.13=h582c2e5_0_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-flatbuffers=2.0=pyhd8ed1ab_0
- python_abi=3.8=2_cp38
- pytz=2022.2.1=pyhd8ed1ab_0
- pyu2f=0.1.5=pyhd8ed1ab_0
- re2=2022.06.01=h27087fc_0
- readline=8.1.2=h0f457ee_0
- recon=1.08=hec16e2b_4
- regex=2022.9.13=py38h0a891b7_0
- repeatmasker=4.0.6=6
- blast=2.5.0
- genometools-genometools=1.6.1
- repeatmodeler=1.0.8
- edta=1.9.6
- repeatmodeler=1.0.8=pl5.22.0_1
- repeatscout=1.0.6=hec16e2b_3
- requests=2.28.1=pyhd8ed1ab_1
- requests-oauthlib=1.3.1=pyhd8ed1ab_0
- rmblast=2.11.0=h6200dbe_0
- rsa=4.9=pyhd8ed1ab_0
- scikit-learn=1.1.2=py38h0b08f9b_0
- scipy=1.9.1=py38hea3f02b_0
- setuptools=65.3.0=pyhd8ed1ab_1
- six=1.16.0=pyh6c4a22f_0
- snappy=1.1.9=hbd366e4_1
- sqlite=3.39.3=h4ff8645_0
- tensorboard=2.9.0=pyhd8ed1ab_0
- tensorboard-data-server=0.6.0=py38h2b5fc30_2
- tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
- tensorflow=2.9.1=mkl_py38h96f9fba_0
- tensorflow-base=2.9.1=mkl_py38h353358b_0
- tensorflow-estimator=2.9.0=py38h06a4308_0
- termcolor=2.0.1=pyhd8ed1ab_1
- tesorter=1.3.0=py_0
- threadpoolctl=3.1.0=pyh8a188c0_0
- tk=8.6.12=h27826a3_0
- trf=4.09.1=hec16e2b_2
- typing-extensions=4.3.0=hd8ed1ab_0
- typing_extensions=4.3.0=pyha770c72_0
- urllib3=1.26.11=pyhd8ed1ab_0
- werkzeug=2.2.2=pyhd8ed1ab_0
- wget=1.20.3=ha56f1ee_1
- wheel=0.37.1=pyhd8ed1ab_0
- wrapt=1.14.1=py38h0a891b7_0
- xorg-fixesproto=5.0=h7f98852_1002
- xorg-inputproto=2.3.2=h7f98852_1002
- xorg-kbproto=1.0.7=h7f98852_1002
- xorg-libice=1.0.10=h7f98852_0
- xorg-libsm=1.2.3=hd9c2040_1000
- xorg-libx11=1.7.2=h7f98852_0
- xorg-libxau=1.0.9=h7f98852_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xorg-libxext=1.3.4=h7f98852_1
- xorg-libxfixes=5.0.3=h7f98852_1004
- xorg-libxi=1.7.10=h7f98852_0
- xorg-libxrender=0.9.10=h7f98852_1003
- xorg-libxtst=1.2.3=h7f98852_1002
- xorg-recordproto=1.14.2=h7f98852_1002
- xorg-renderproto=0.11.1=h7f98852_1002
- xorg-xextproto=7.3.0=h7f98852_1002
- xorg-xproto=7.0.31=h7f98852_1007
- xz=5.2.6=h166bdaf_0
- yarl=1.7.2=py38h0a891b7_2
- zipp=3.8.1=pyhd8ed1ab_0
- zlib=1.2.12=h166bdaf_3
- zstd=1.5.2=h6239696_4
prefix: /groups/itay_mayrose_nosnap/liorglic/Projects/PGCM/output/A_thaliana_pan_genome/iterative_mapping/x50/RESULT/.snakemake/conda/9674078d2e19d4a2156d50cfa6e8d462
35 changes: 18 additions & 17 deletions iterative_assembly/PGC_iterative_assembly.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -264,21 +264,21 @@ rule copy_reference:

rule index_reference:
"""
Index reference genome for BWA
Index reference genome for Bowtie2
"""
input:
config["out_dir"] + "/all_samples/ref/" + config['reference_name'] + '_genome.fasta'
output:
config["out_dir"] + "/all_samples/ref/" + config['reference_name'] + '_genome.fasta.bwt'
config["out_dir"] + "/all_samples/ref/" + config['reference_name'] + '_genome.fasta.4.bt2'
params:
queue=config['queue'],
priority=config['priority'],
logs_dir=LOGS_DIR
conda:
CONDA_ENV_DIR + '/bwa.yml'
CONDA_ENV_DIR + '/bowtie2.yml'
shell:
"""
bwa index {input}
bowtie2-build {input} {input}
"""

rule map_reads_to_ref:
Expand All @@ -287,7 +287,7 @@ rule map_reads_to_ref:
"""
input:
ref_genome=config["out_dir"] + "/all_samples/ref/" + config['reference_name'] + '_genome.fasta',
ref_genome_index=config["out_dir"] + "/all_samples/ref/" + config['reference_name'] + '_genome.fasta.bwt',
ref_genome_index=config["out_dir"] + "/all_samples/ref/" + config['reference_name'] + '_genome.fasta.4.bt2',
r1_paired=config["out_dir"] + "/per_sample/{sample}/RPP_{ena_ref}/{ena_ref}_1_clean_paired.fastq.gz",
r1_unpaired=config["out_dir"] + "/per_sample/{sample}/RPP_{ena_ref}/{ena_ref}_1_clean_unpaired.fastq.gz",
r2_paired=config["out_dir"] + "/per_sample/{sample}/RPP_{ena_ref}/{ena_ref}_2_clean_paired.fastq.gz",
Expand All @@ -302,12 +302,12 @@ rule map_reads_to_ref:
logs_dir=LOGS_DIR,
ppn=config['ppn']
conda:
CONDA_ENV_DIR + '/bwa.yml'
CONDA_ENV_DIR + '/bowtie2.yml'
shell:
"""
bwa mem {input.ref_genome} {input.r1_paired} {input.r2_paired} -t {params.ppn} > {output.paired_map}
bwa mem {input.ref_genome} {input.r1_unpaired} -t {params.ppn} > {output.r1_unpaired_map}
bwa mem {input.ref_genome} {input.r2_unpaired} -t {params.ppn} > {output.r2_unpaired_map}
bowtie2 -x {input.ref_genome} -1 {input.r1_paired} -2 {input.r2_paired} -p {params.ppn} > {output.paired_map}
bowtie2 -x {input.ref_genome} -U {input.r1_unpaired} -p {params.ppn} > {output.r1_unpaired_map}
bowtie2 -x {input.ref_genome} -U {input.r2_unpaired} -p {params.ppn} > {output.r2_unpaired_map}
"""

rule extract_unmapped:
Expand Down Expand Up @@ -486,12 +486,13 @@ elif config['assembler'] == 'minia':
params:
out_dir=config["out_dir"] + "/per_sample/{sample}/assembly_{ena_ref}",
ppn=config['ppn'],
ppn_minus5=config['ppn']-5,
queue=config['queue'],
priority=config['priority'],
logs_dir=LOGS_DIR
shell:
"""
{input.minia} -1 {input.r1_paired} -2 {input.r2_paired} -s {input.single_reads_list} --nb-cores {params.ppn} --no-scaffolding -o {params.out_dir}/assembly --cleanup
{input.minia} -1 {input.r1_paired} -2 {input.r2_paired} -s {input.single_reads_list} --nb-cores {params.ppn_minus5} --no-scaffolding -o {params.out_dir}/assembly --cleanup
ln {params.out_dir}/assembly_final.contigs.fa {output}
"""

Expand Down Expand Up @@ -910,21 +911,21 @@ rule create_pan_annotation:

rule index_pan_genome:
"""
Index pan genome for BWA runs
Index pan genome for Bowtie2 runs
"""
input:
config["out_dir"] + "/all_samples/pan_genome/pan_genome.fasta"
output:
config["out_dir"] + "/all_samples/pan_genome/pan_genome.fasta.bwt"
config["out_dir"] + "/all_samples/pan_genome/pan_genome.fasta.4.bt2"
params:
queue=config['queue'],
priority=config['priority'],
logs_dir=LOGS_DIR,
conda:
CONDA_ENV_DIR + '/bwa.yml'
CONDA_ENV_DIR + '/bowtie2.yml'
shell:
"""
bwa index {input}
bowtie2-build {input} {input}
"""

rule map_reads_to_pan:
Expand All @@ -936,7 +937,7 @@ rule map_reads_to_pan:
r1_paired=config["out_dir"] + "/per_sample/{sample}/RPP_{ena_ref}/{ena_ref}_1_clean_paired.fastq.gz",
r2_paired=config["out_dir"] + "/per_sample/{sample}/RPP_{ena_ref}/{ena_ref}_2_clean_paired.fastq.gz",
pan_genome=config["out_dir"] + "/all_samples/pan_genome/pan_genome.fasta",
pan_genome_index=config["out_dir"] + "/all_samples/pan_genome/pan_genome.fasta.bwt"
pan_genome_index=config["out_dir"] + "/all_samples/pan_genome/pan_genome.fasta.4.bt2"
output:
config["out_dir"] + "/per_sample/{sample}/map_to_pan_{ena_ref}/{ena_ref}_map_to_pan.sam"
params:
Expand All @@ -945,10 +946,10 @@ rule map_reads_to_pan:
logs_dir=LOGS_DIR,
ppn=config['ppn']
conda:
CONDA_ENV_DIR + '/bwa.yml'
CONDA_ENV_DIR + '/bowtie2.yml'
shell:
"""
bwa mem -t {params.ppn} {input.pan_genome} {input.r1_paired} {input.r2_paired} > {output}
bowtie2 -p {params.ppn} -x {input.pan_genome} -1 {input.r1_paired} -2 {input.r2_paired} > {output}
"""

rule sam_to_sorted_bam:
Expand Down

0 comments on commit 81cc93b

Please sign in to comment.