Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Helixer #44

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.1
current_version = 0.7.1
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion annotation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from Mikado.parsers import parser_factory
from Mikado.transcripts import Gene, Transcript

VERSION = '0.6.1'
VERSION = '0.7.1'
RUN_METADATA = "run_details.json"

UTR_SELECTION_OPTIONS = ('augustus', 'gold', 'silver', 'bronze', 'all', 'hq_assembly', 'lq_assembly')
Expand Down
26 changes: 24 additions & 2 deletions annotation/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def combine_arguments_prediction(cli_arguments):
if cli_arguments.repeats:
cromwell_inputs['ei_prediction.repeats_gff'] = cli_arguments.repeats.name

if cli_arguments.helixer_model:
cromwell_inputs['ei_prediction.helixer_model'] = cli_arguments.helixer_model

if cli_arguments.force_train:
cromwell_inputs['ei_prediction.force_train'] = cli_arguments.force_train

Expand Down Expand Up @@ -89,6 +92,9 @@ def combine_arguments_prediction(cli_arguments):
if cli_arguments.mikado_utr_files:
cromwell_inputs['ei_prediction.mikado_utr_files'] = ' '.join(cli_arguments.mikado_utr_files)

if cli_arguments.temp_dir:
cromwell_inputs['ei_prediction.temp_dir'] = cli_arguments.temp_dir

if cli_arguments.do_glimmer:
cromwell_inputs['ei_prediction.do_glimmer'] = 'true'
if cli_arguments.do_glimmer is not True and os.access(cli_arguments.do_glimmer, os.R_OK):
Expand All @@ -103,7 +109,10 @@ def combine_arguments_prediction(cli_arguments):
cromwell_inputs['ei_prediction.do_codingquarry'] = 'true'
if cli_arguments.do_codingquarry is not True and os.access(cli_arguments.do_codingquarry, os.R_OK):
cromwell_inputs['ei_prediction.codingquarry_training'] = cli_arguments.do_codingquarry


if cli_arguments.do_helixer:
cromwell_inputs['ei_prediction.do_helixer'] = 'true'

if cli_arguments.no_augustus and cli_arguments.no_augustus is False:
cromwell_inputs['ei_prediction.do_augustus'] = 'false'

Expand Down Expand Up @@ -222,8 +231,11 @@ def combine_arguments_prediction(cli_arguments):
cromwell_inputs['ei_prediction.snap_extra_params'] = cli_arguments.snap_extra_params
if cli_arguments.augustus_extra_params:
cromwell_inputs['ei_prediction.augustus_extra_params'] = cli_arguments.augustus_extra_params
if cli_arguments.helixer_extra_params:
cromwell_inputs['ei_prediction.helixer_extra_params'] = cli_arguments.helixer_extra_params
if cli_arguments.evm_extra_params:
cromwell_inputs['ei_prediction.evm_extra_params'] = cli_arguments.evm_extra_params


if cli_arguments.mikado_config:
cromwell_inputs['ei_prediction.mikado_config'] = cli_arguments.mikado_config.name
Expand All @@ -246,7 +258,8 @@ def collect_prediction_output(run_metadata):

if outputs['ei_prediction.glimmer']:
symlink(outputs_path, outputs['ei_prediction.glimmer'])

if outputs['ei_prediction.helixer']:
symlink(outputs_path, outputs['ei_prediction.helixer'])
if outputs['ei_prediction.snap']:
symlink(outputs_path, outputs['ei_prediction.snap'])
if outputs['ei_prediction.codingquarry']:
Expand All @@ -271,6 +284,7 @@ def collect_prediction_output(run_metadata):
snap_prediction_path = os.path.join(predictions_path, "SNAP")
codingquarry_prediction_path = os.path.join(predictions_path, "CodingQuarry")
augustus_prediction_path = os.path.join(predictions_path, "Augustus")
helixer_prediction_path = os.path.join(predictions_path, "Helixer")
if not os.path.exists(predictions_path):
os.mkdir(predictions_path)

Expand Down Expand Up @@ -303,6 +317,14 @@ def collect_prediction_output(run_metadata):
else:
if os.path.exists(glimmer_prediction_path):
shutil.rmtree(glimmer_prediction_path)

if outputs['ei_prediction.predictions_helixer']:
if not os.path.exists(helixer_prediction_path):
os.mkdir(helixer_prediction_path)
symlink(helixer_prediction_path, outputs['ei_prediction.predictions_helixer'])
else:
if os.path.exists(helixer_prediction_path):
shutil.rmtree(helixer_prediction_path)

if outputs['ei_prediction.predictions_augustus']:
if not os.path.exists(augustus_prediction_path):
Expand Down
79 changes: 78 additions & 1 deletion annotation/prediction_module/main.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ workflow ei_prediction {
Boolean do_glimmer = false
Directory? glimmer_training
Boolean do_snap = false
Boolean do_helixer = false
File? snap_training
String? temp_dir

Boolean do_augustus = true
Array[File]? transcriptome_models # Classify and divide into Gold, Silver and Bronze
Array[File]? homology_models # Take as is
Expand All @@ -31,6 +34,8 @@ workflow ei_prediction {

File? repeats_gff # These are passed through to augustus
File? extra_training_models # These models are taken as-is directly as results from the training model selection
File? helixer_model #model used for helixer gene model prediction


Int flank = 200
Int kfold = 8
Expand All @@ -51,7 +56,7 @@ workflow ei_prediction {
String? snap_extra_params
String? augustus_extra_params
String? evm_extra_params

String? helixer_extra_params
RuntimeAttr augustus_resources
}

Expand Down Expand Up @@ -262,6 +267,18 @@ workflow ei_prediction {
Boolean train_utr = select_first([train_utr_, false])
Int total_models = select_first([num_models, 0])

#Run helixer predictions if GPU is available
if (do_helixer) {
call Helixer {
input:
genome = def_reference_genome,
model = helixer_model,
temp_dir = temp_dir,
species = species,
extra_params = helixer_extra_params
}
}

# Generate CodingQuarry predictions
# Considers lowercase as masked by default
if (total_models > 1500 && do_codingquarry) {
Expand Down Expand Up @@ -421,6 +438,7 @@ workflow ei_prediction {
augustus_predictions = def_augustus_predictions,
snap_predictions = SNAP.predictions,
glimmer_predictions = GlimmerHMM.predictions,
helixer_predictions = Helixer.predictions,
codingquarry_predictions = CodingQuarry.predictions,
codingquarry_fresh_predictions = CodingQuarryFresh.predictions,
hq_protein_alignments = hq_protein.processed_gff,
Expand Down Expand Up @@ -514,6 +532,7 @@ workflow ei_prediction {
File? codingquarry = EVM.formatted_codingquarry_predictions
File? codingquarry_fresh = EVM.formatted_codingquarry_fresh_predictions
File? augustus_abinitio = EVM.formatted_augustus_abinitio_predictions
File? helixer = EVM.formatted_helixer_predictions
Array[File]? augustus = EVM.formatted_augustus_runs_predictions
File evm_predictions = CombineEVM.predictions
File mikado_loci = MikadoPick.loci
Expand Down Expand Up @@ -1157,6 +1176,56 @@ task GlimmerHMM {
>>>
}


task Helixer {
input {
IndexedReference genome
File model
Int sequence_length
String species
String temp_dir
RuntimeAttr? resources
}

RuntimeAttr default_attr = object {
constraints: "avx|avx2|sse4",
cpu_cores: 8,
mem_gb: 64,
max_retries: 1,
queue: "ei-a100,ei-gpu"
}

RuntimeAttr runtime_attr = select_first([resources, default_attr])

Int cpus = select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
Int gpus = select_first([runtime_attr.gpu, default_attr.gpu])

runtime {
cpu: cpus
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GB"
constraints: select_first([runtime_attr.constraints, default_attr.constraints])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
queue: select_first([runtime_attr.queue, default_attr.queue])
gpu: gpus
}


output {
File predictions = "helixer.predictions.gff3"
}

command <<<
set -euxo pipefail
ln -s ~{genome.fasta}
Helixer.py --model-filepath \
--temporary-dir ~{temp_dir} \
--species ~{species} \
--fasta-path ~{genome.fasta} \
~{helixer_extra_params} \
--gff-output-path helixer.predictions.gff3
>>>
}

task GenerateModelProteins {
input {
IndexedReference genome
Expand Down Expand Up @@ -1393,6 +1462,7 @@ task EVM {
File? augustus_abinitio
File? snap_predictions
File? glimmer_predictions
File? helixer_predictions
File? codingquarry_predictions
File? codingquarry_fresh_predictions
File? hq_protein_alignments
Expand Down Expand Up @@ -1426,13 +1496,15 @@ task EVM {
File? formatted_codingquarry_fresh_predictions = "codingquarry_fresh.predictions.gff"
File? formatted_augustus_abinitio_predictions = "augustus_abinitio.predictions.gff"
Array[File]? formatted_augustus_runs_predictions = glob("augustus_*.predictions.gff")
File? formatted_helixer_predictions = "helixer.predictions.gff"

File? formatted_snap_predictions_stats = "snap.predictions.stats"
File? formatted_glimmer_predictions_stats = "glimmer.predictions.stats"
File? formatted_codingquarry_predictions_stats = "codingquarry.predictions.stats"
File? formatted_codingquarry_fresh_predictions_stats = "codingquarry_fresh.predictions.stats"
File? formatted_augustus_abinitio_predictions_stats = "augustus_abinitio.predictions.stats"
Array[File]? formatted_augustus_runs_predictions_stats = glob("augustus_*.predictions.stats")
File? formatted_helixer_predictions_stats = "helixer.predictions.gff.stats"
}

command <<<
Expand Down Expand Up @@ -1487,6 +1559,11 @@ task EVM {
mikado util stats augustus_abinitio.predictions.gff augustus_abinitio.predictions.stats
fi

if [ "~{helixer_predictions}" != "" ]; then
cat ~{helixer_predictions} | gff_to_evm glimmer | tee helixer.predictions.gff >> predictions.gff
mikado util stats helixer.predictions.gff helixer.predictions.stats
fi

augustus_run=0
for i in ~{sep=" " augustus_predictions}; do
((augustus_run++))
Expand Down
10 changes: 10 additions & 0 deletions cromwell_configuration/slurm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ backend {
Int memory_mb = 8000
String? constraints
String? queue = "ei-medium"
Int? gpu
"""

submit = """
Expand All @@ -84,6 +85,15 @@ backend {
--mem ${memory_mb} \
--wrap "/bin/bash
${script}"
elif [ "${queue}" == "ei-a100" || "${queue}" == "ei-gpu" ] #gpu specific nodes in the cluster
then
sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \
-p "${queue}" \
${"-c " + cpu} \
--mem ${memory_mb} \
--gres=gpu:"${gpu}"
--wrap "/bin/bash
${script}"
else
sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \
-p ${queue} \
Expand Down
1 change: 1 addition & 0 deletions reat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ dependencies:
- codingquarry=2.0
- augustus=3.4.0
- evidencemodeler=1.1.1

6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="reat",
version="0.6.1",
version="0.7.1",
packages=find_packages(".", exclude=["tests"]),
url="https://github.com/ei-corebioinformatics/reat",
classifiers=[
Expand All @@ -19,8 +19,8 @@
"Programming Language :: Python :: 3.8",
],
license="MIT",
author="Luis Yanes, Gemy Kaithakottil",
author_email="luis.yanes@earlham.ac.uk, gemy.kaithakottil@earlham.ac.uk",
author="Luis Yanes, Gemy Kaithakottil, Mariano Olivera Fedi",
author_email="luis.yanes@earlham.ac.uk, gemy.kaithakottil@earlham.ac.uk, mariano.olivera-fedi@earlham.ac.uk",
description="Robust Eukaryotic Annotation Toolkit",
zip_safe=False,
keywords="gene annotation WDL pipeline workflow cromwell transcriptome homology",
Expand Down
2 changes: 1 addition & 1 deletion singularity/build_image.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
set -euxo
version=0.6.1
version=0.7.1
rundir=$(dirname "$(realpath "$0")")
cd "$(mktemp -d)"
cp "${rundir}"/reat_singularity.def reat.def
Expand Down
Loading