In [1]:
import subprocess
import os
import logging

In [2]:
__author__='Maggie Ruimin Sun'

Reference source code: alignReads.py, scripted by Martin Aryee.
Source code can be found at https://github.com/aryeelab/guideseq/tree/master/guideseq

In [3]:
logger = logging.getLogger('root')
logger.propagate = False

In [5]:
def alignReads(BWA_path, HG19_ref_seq, HG19_ref_index, read1, read2, outfile):
    output_folder = os.path.dirname(outfile)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    sample_alignment_paths = {}
    
    index_files_extensions = ['.pac', '.amb', '.ann', '.bwt', '.sa']
    
    genome_indexed = True
    
    for extension in index_files_extensions:
        if not os.path.isfile(HG19_ref_index + extension):
            genome_indexed = False
            break
    if not genome_indexed:
        #logger.info('Genome index files not detected. Running BWA to generate indices.')
        print 'Genome index files not detected. Running BWA to generate indices.'
        bwa_index_command = '{0} index -p {1} {2}'.format(BWA_path, HG19_ref_index, HG19_ref_seq)
        #logger.info("Running bwa command: %s", bwa_index_command)
        print "Running bwa command: " + bwa_index_command
        subprocess.call(bwa_index_command.split())
        #logger.info('BWA genome index generated.')
        print 'BWA genome index generated.'
    else:
        #logger.info('BWA genome index found.')
        print 'BWA genome index found.'
    print 'Running paired end mapping.'
    #logger.info('Running paired end mapping for {0}'.format(sample_name))
    bwa_alignment_command = '{0} mem {1} {2} {3}'.format(BWA_path, HG19_ref_index, read1, read2)
    #logger.info(bwa_alignment_command)
    print bwa_alignment_command
    
    with open(outfile, 'w') as f:
        subprocess.call(bwa_alignment_command.split(), stdout=f)
    #logger.info('Paired end mapping completed.')
    print 'Paired end mapping completed.'

In [6]:
def main():
    BWA_path = '/home/yaneng/RSun/Softwares/bwa/bwa'
    HG19_ref_index = '/home/yaneng/RSun/Softwares/bwa/QIANGEN_DHS_002Z'
    HG19_ref_seq = '/home/yaneng/RSun/Data/qiagen-colon/DHS-002Z.refSeq.fa'
    read1 = '/home/yaneng/RSun/Data/qiagen-colon/consolidated/QIAGEN-2959YJ_S2_L001_R1_001_consolidated.fq'
    read2 = '/home/yaneng/RSun/Data/qiagen-colon/consolidated/QIAGEN-2959YJ_S2_L001_R2_001_consolidated.fq'
    outfile = '/home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.sam'
    alignReads(BWA_path, HG19_ref_seq, HG19_ref_index, read1, read2, outfile)

In [8]:
if __name__ == '__main__':
    main()

Genome index files not detected. Running BWA to generate indices.
Running bwa command: /home/yaneng/RSun/Softwares/bwa/bwa index -p /home/yaneng/RSun/Softwares/bwa/QIANGEN_DHS_002Z /home/yaneng/RSun/Data/qiagen-colon/DHS-002Z.refSeq.fa
BWA genome index generated.
Running paired end mapping.
/home/yaneng/RSun/Softwares/bwa/bwa mem /home/yaneng/RSun/Softwares/bwa/QIANGEN_DHS_002Z /home/yaneng/RSun/Data/qiagen-colon/consolidated/QIAGEN-2959YJ_S2_L001_R1_001_consolidated.fq /home/yaneng/RSun/Data/qiagen-colon/consolidated/QIAGEN-2959YJ_S2_L001_R2_001_consolidated.fq
Paired end mapping completed.
