## Implémentation dans Cosmos

In [2]:
#!/usr/bin/env python

#------------------------
# IMPORTATION DES MODULES

import sys
sys.path.append('./modules') #-> Pour implémenter les modules dans le répertoire 'modules' 

    #--------------------
    # Modules perso
import Sequence                                                                       
from modules.tools import quality_treatment, length_treatment, format_files, r_call, fastq2fasta 

    #--------------------
    # Modules cosmos
import os
import subprocess
from cosmos.api import Cosmos, draw_stage_graph, draw_task_graph, pygraphviz_available

#------------------------
# DEFINITION DU WORKFLOW 'run_test'


def run_test(execution):    
    
    #--------------------
    # Génération des graps initiaux    
    
    
    Format0 = [execution.add_task(format_files,
                                 tags = dict(path2input="testing/data/4.fastq", path2output="testing/data/formated_initial", ftype=ftype),
                                 )
                for ftype in ['quality','length']]
    GraphGen0_a = [execution.add_task(r_call,
                                   tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_initial_quality.txt", path2output="testing/results/initial_quality_scores.pdf" ),
                                   parents=Format0)
                    ]
    GraphGen0_b = [execution.add_task(r_call,
                               tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_initial_length.txt", path2output="testing/results/initial_sequence_length.pdf" ),
                               parents=Format0)
                    ]
    
    #--------------------
    # Traitement de qualité et génération des graphs After Quality Treatment (AQT)      

    QualTR = [execution.add_task(quality_treatment,
                                 tags = dict(path2file="testing/data/4.fastq", path2output="testing/data/quality_treatment_results.fastq", threshold=15)
                                 )
                    ]
        
    Format1 = [execution.add_task(format_files,
                                 tags = dict(path2input="testing/data/quality_treatment_results.fastq", path2output="testing/data/formated_AQT", ftype=ftype),
                                 parents = QualTR
                                 )
                for ftype in ['quality','length']]
    GraphGen1_a = [execution.add_task(r_call,
                                   tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_AQT_quality.txt", path2output="testing/results/AQT_quality_scores.pdf" ),
                                   parents = Format1)
                    ]
    GraphGen1_b = [execution.add_task(r_call,
                               tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_AQT_length.txt", path2output="testing/results/AQT_sequence_length.pdf" ),
                               parents = Format1)
                    ]
   
    #--------------------
    # Traitement de longueur des séquences et génération des graphs After Length Treatment (ALT)      
           
    LenTR = [execution.add_task(length_treatment,
                          tags = dict(path2file="testing/data/quality_treatment_results.fastq", path2output="testing/data/length_treatment_results.fastq", threshold=60),
                          parents = QualTR)
            ]
    
    Format2 = [execution.add_task(format_files,
                                 tags = dict(path2input="testing/data/length_treatment_results.fastq", path2output="testing/data/formated_ALT", ftype=ftype),
                                 parents = LenTR
                                 )
                for ftype in ['quality','length']]
    
    GraphGen2_a = [execution.add_task(r_call,
                                   tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_ALT_quality.txt", path2output="testing/results/ALT_quality_scores.pdf" ),
                                   parents = Format2)
                    ]
    GraphGen2_b = [execution.add_task(r_call,
                               tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_ALT_length.txt", path2output="testing/results/ALT_sequence_length.pdf" ),
                               parents = Format2)
                    ]
   
    #--------------------
    # Génération d'un fichier sortie FASTA
    
    FastaFormat = [execution.add_task(fastq2fasta,
                                     tags = dict(path2input="testing/data/length_treatment_results.fastq", path2output="testing/results/Final.fasta"),
                                     parents = LenTR)]
    #--------------------
    # Génération des schémas du workflow (si pygraphviz installé)
    
    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(), 'testing/workflow_info/test_task_graph.png', format='png')
        draw_task_graph(execution.task_graph(), 'testing/workflow_info/test_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run(max_attempts=1, max_cores=10)
    
#------------------------
# EXECUTION DU WORKFLOW

    #--------------------
    # Connexion aux services cosmos (BDD)    
cosmos = Cosmos('sqlite:///sqlite.db')
cosmos.initdb()
    
    #--------------------
    # Création des sous-dossiers résultats
subprocess.check_call('mkdir -p testing testing/data testing/results testing/workflow_info', shell=True)
subprocess.check_call('cp extdata/4.fastq testing/data', shell=True)

    #--------------------
    # Définition de la tâche et éxecution
execution = cosmos.start('Test1', 'testing',restart=True, skip_confirm=True)
run_test(execution)

Initializing sql database for Cosmos v1.7.3...
INFO: 2016-04-25 10:02:57: Deleting <Execution[1] Test1>, output_dir=/home/bulteaur/Documents/Modules/src/testing, delete_files=False
Deleting output_dir: /home/bulteaur/Documents/Modules/src/testing...
<Execution[1] Test1> Deleting from SQL...
<Execution[1] Test1> Deleted
INFO: 2016-04-25 10:02:58: Preparing to run <Execution[1] Test1> using DRM `local`, output_dir: `/home/bulteaur/Documents/Modules/src/testing`
INFO: 2016-04-25 10:02:58: <Stage[3] Quality_Treatment> Has not been attempted
INFO: 2016-04-25 10:02:58: <Stage[4] Length_Treatment> Has not been attempted
INFO: 2016-04-25 10:02:58: <Stage[1] Format_Files> Has not been attempted
INFO: 2016-04-25 10:02:58: <Stage[2] R_Call> Has not been attempted
INFO: 2016-04-25 10:02:58: <Stage[5] Fastq2Fasta> Has not been attempted
INFO: 2016-04-25 10:02:58: Skipping 0 successful tasks...
INFO: 2016-04-25 10:02:58: Ensuring there are enough cores...
INFO: 2016-04-25 10:02:58: Committing to SQL