In [1]:
import bioluigi.tasks as tasks
from bioluigi.slurm import SlurmExecutableTask
from bioluigi.decorators import requires, inherits
from bioluigi.utils import CheckTargetNonEmpty
import luigi
import os,re
from luigi import LocalTarget, Parameter
import pandas as pd
import numpy as np
import luigi.tools.deps_tree

In [2]:
maker_opts_1={
'organism_type':'eukaryotic', 
'model_org':'fungi',
'repeat_protein':'/tgac/software/testing/maker/2.31.8/x86_64/data/te_proteins.fasta', 
'est2genome':1, 
'protein2genome':1, 
'TMP':'/tmp'}

maker_opts_2={
'organism_type':'eukaryotic', 
'model_org':'fungi',
'repeat_protein':'/tgac/software/testing/maker/2.31.8/x86_64/data/te_proteins.fasta', 
'est2genome':0, 
'protein2genome':0, 
'TMP':'/tmp',
'est_pass':1,
'protein_pass':1,
'rm_pass':1}

In [14]:
reference='/usr/users/JIC_a1/buntingd/GenomeAnnotation/PST104/reference/Pst_104E_v13_p_ctg.fa'
base_dir="/usr/users/JIC_a1/buntingd/GenomeAnnotation/PST104/data"

class ReferenceFasta(tasks.ReferenceFasta):
    pass

@requires(ReferenceFasta)
class AddGenome(tasks.jbrowse.AddGenome):
    pass

@requires(ReferenceFasta)
class RepeatMasker(tasks.repeatmodeler.RepeatMasker):
    pass

class StringTieMerge(luigi.ExternalTask):
    def output(self):
        return LocalTarget("/usr/users/JIC_a1/buntingd/GenomeAnnotation/PST104/data/stringtie.gff3")
    
class UniProt(luigi.ExternalTask):
    def output(self):
        return LocalTarget("/usr/users/JIC_a1/buntingd/GenomeAnnotation/uniprot_PST78.fasta")

    
## ----------- Inital MAKER run ------------- ##

@inherits(ReferenceFasta,StringTieMerge,UniProt,tasks.maker.MAKER)
class MAKER1(luigi.WrapperTask):
    maker_opts, maker_prefix=None, None
    def requires(self):
        return self.clone(requires(genome=ReferenceFasta, 
                                   est_gff=StringTieMerge, 
                                   protein=UniProt)(tasks.maker.MAKER),
                          maker_opts=maker_opts_1,
                          maker_prefix="Pst_104E_v13_p_ctg__round1")
    def output(self):
        return self.requires().output()

@requires(MAKER1)
class GFFMerge1(tasks.maker.GFFMerge):
    pass

    
@requires(gff=GFFMerge1, genome=AddGenome)
class AddGFF1(tasks.jbrowse.AddGFF):
    pass

@requires(MAKER1)
class Maker2Jbrowse1(tasks.maker.Maker2Jbrowse):
    pass   

##----------- SNAP Training -------------##

@requires(MAKER1)
class Maker2ZFF1(tasks.maker.Maker2ZFF):
    pass

@requires(Maker2ZFF1)
class TrainSNAP1(tasks.maker.TrainSNAP):
    pass

@requires(Maker2ZFF1)
class FathomStats1(tasks.maker.FathomStats):
    pass

##----------- Genemark Training -------------##

@requires(RepeatMasker)
class GenemarkESTrain(tasks.genemark.GenemarkESTrain):
    pass

##----------- Second MAKER -------------##

@inherits(TrainSNAP1, GenemarkESTrain)
class MAKER2(luigi.WrapperTask):
    maker_opts, maker_prefix=None, None
    def requires(self):
        return self.clone(requires(genome=ReferenceFasta, 
                                   maker_gff=GFFMerge1, 
                                   snaphmm=TrainSNAP1,
                                   gmhmm=GenemarkESTrain)(tasks.maker.MAKER),
                          maker_opts=maker_opts_2,
                          maker_prefix="Pst_104E_v13_p_ctg_round2")    
    def output(self):
        return self.requires().output()   

@requires(MAKER2)
class GFFMerge2(tasks.maker.GFFMerge):
    pass

@requires(gff=GFFMerge2, genome=AddGenome)
class AddGFF2(tasks.jbrowse.AddGFF):
    pass

@requires(MAKER2)
class Maker2Jbrowse2(tasks.maker.Maker2Jbrowse):
    pass   

##----------- Second SNAP Training -------------##

@requires(MAKER2)
class Maker2ZFF2(tasks.maker.Maker2ZFF):
    pass

@requires(Maker2ZFF2)
class TrainSNAP2(tasks.maker.TrainSNAP):
    pass

@requires(Maker2ZFF2)
class FathomStats2(tasks.maker.FathomStats):
    pass

##----------- Third MAKER -------------##

@inherits(MAKER2, TrainSNAP2)
class MAKER3(luigi.WrapperTask):
    maker_opts, maker_prefix=None, None
    def requires(self):
        return self.clone(requires(genome=ReferenceFasta, 
                                   maker_gff=GFFMerge2, 
                                   snaphmm=TrainSNAP2,
                                   gmhmm=GenemarkESTrain)(tasks.maker.MAKER),
                          maker_opts=maker_opts_2,
                          maker_prefix="Pst_104E_v13_p_ctg_round3")    
    def output(self):
        return self.requires().output()   
    
@requires(MAKER3)
class GFFMerge3(tasks.maker.GFFMerge):
    pass

@requires(gff=GFFMerge3, genome=AddGenome)
class AddGFF3(tasks.jbrowse.AddGFF):
    pass

@requires(MAKER3)
class Maker2Jbrowse3(tasks.maker.Maker2Jbrowse):
    pass   


##----------- Pipeline Wrapper -------------##
@requires(Maker2Jbrowse1,Maker2Jbrowse2, Maker2Jbrowse3, 
          AddGFF1, AddGFF2, AddGFF3, 
          FathomStats1, FathomStats2)
class MAKERPipelineWrapper(luigi.WrapperTask):
    pass

In [15]:
luigi.build([MAKERPipelineWrapper(reference=reference,
                    base_dir="/usr/users/JIC_a1/buntingd/GenomeAnnotation/PST104/data",
                    jbrowse_dir='/usr/users/JIC_a1/buntingd/GenomeAnnotation/Jbrowse/JBrowse-1.15.0')],
            workers=25,
            local_scheduler=False,
            scheduler_host='j512n1',
            log_level='INFO')

INFO: Informed scheduler that task   MAKERPipelineWrapper__usr_users_JIC_a__usr_users_JIC_a__usr_users_JIC_a_7b759e266d   has status   PENDING
INFO: Informed scheduler that task   FathomStats2__usr_users_JIC_a__usr_users_JIC_a_2b875e6c10   has status   DONE
INFO: Informed scheduler that task   FathomStats1__usr_users_JIC_a__usr_users_JIC_a_2b875e6c10   has status   DONE
INFO: Informed scheduler that task   AddGFF3__usr_users_JIC_a__usr_users_JIC_a__usr_users_JIC_a_7b759e266d   has status   PENDING
INFO: Informed scheduler that task   GFFMerge3__usr_users_JIC_a__usr_users_JIC_a_2b875e6c10   has status   DONE
INFO: Informed scheduler that task   AddGenome__usr_users_JIC_a__usr_users_JIC_a_5f48dcba3f   has status   PENDING
INFO: Informed scheduler that task   ReferenceFasta__usr_users_JIC_a_2c99a42331   has status   DONE
INFO: Informed scheduler that task   AddGFF2__usr_users_JIC_a__usr_users_JIC_a__usr_users_JIC_a_7b759e266d   has status   PENDING
INFO: Informed scheduler that task   GFF

True

In [None]:
print(luigi.tools.deps_tree.print_tree(MAKERPipelineWrapper(reference=reference,
                      base_dir="/usr/users/JIC_a1/buntingd/GenomeAnnotation/PST104/data",
                      jbrowse_dir='/usr/users/JIC_a1/buntingd/GenomeAnnotation/Jbrowse/JBrowse-1.15.0')))

In [18]:
AddGFF3(reference=reference,
       jbrowse_dir='/usr/users/JIC_a1/buntingd/GenomeAnnotation/Jbrowse/JBrowse-1.15.0',
       base_dir="/usr/users/JIC_a1/buntingd/GenomeAnnotation/PST104/data").task_id

'AddGFF3__usr_users_JIC_a__usr_users_JIC_a__usr_users_JIC_a_7b759e266d'