In [1]:
import subprocess
import os
import logging

In [2]:
__author__ = 'Maggie Ruimin Sun'

In [3]:
logger = logging.getLogger('root')
logger.propagate = False

In [4]:
def sortSAM(sam_file, samtool_dir, out_bam_file, out_sort_file):
    output_folder = os.path.dirname(out_bam_file)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        
    alignment_success = True
    
    if not os.path.isfile(sam_file):
        alignment_success = False
    
    if not alignment_success:
        print 'Reads have not been aligned yet, or the path to SAM file is WRONG!'
    else:
        print 'Alignment file is: ' + sam_file
    
    print 'Sorting the alignment file...'
    bam_command = '{0} view -b {1} -o {2}'.format(samtool_dir, sam_file, out_bam_file)
    sort_command = '{0} sort {1} -o {2}'.format(samtool_dir, out_bam_file, out_sort_file)
    print bam_command
    subprocess.call(bam_command.split())
    print sort_command
    subprocess.call(sort_command.split())
    print 'The alignment file ' + sam_file + ' has been successfully sorted.'

In [5]:
def callVar(in_sort_file, ref_FA_file, samtool_dir, varscan_dir, min_MAPQ, pileup_file, output_vcf):
    output_folder = os.path.dirname(output_vcf)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    alignment_sorted = True
    
    if not os.path.isfile(in_sort_file):
        alignment_sorted = False
    
    if not alignment_sorted:
        print in_sort_file + ' does not exist!'
    else:
        print 'Sorted alignment file is: ' + in_sort_file
    
    if not os.path.isfile(ref_FA_file+'.fai'):
        print 'Reference sequence file is not faidx indexed.'
        faidx_command = '{0} faidx {1}'.format(samtool_dir, ref_FA_file)
        print faidx_command
        subprocess.call(faidx_command.split())
        print 'Reference sequence file is faidx indexed.\n'
    else:
        print 'Reference sequence file has been faidx indexed.\n'
    
    
    print '\nPiling alignment results up with the reference genome sequence...'
    pileup_command = '{0} mpileup -f {1} -q{2} {3}'.format(samtool_dir, ref_FA_file, str(min_MAPQ), in_sort_file)
    print pileup_command
    with open(pileup_file, 'w') as f:
        subprocess.call(pileup_command.split(), stdout=f)
    f.close()
    print '\nCalling SNVs...'
    snv_command = 'java -jar {0} mpileup2snp {1} --output-vcf'.format(varscan_dir, pileup_file)
    print snv_command
    with open(output_vcf, 'w') as fout:
        subprocess.call(snv_command.split(), stdout=fout)
    fout.close()
    print 'SNV calling completed.'

In [11]:
def main():
    sam_file = '/home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.sam'
    samtool_dir = '/home/yaneng/RSun/Softwares/samtools/samtools'
    out_bam_file = '/home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.bam'
    out_sort_file = '/home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_sorted.bam'
    ref_FA_file = '/home/yaneng/RSun/Data/qiagen-colon/DHS-002Z.refSeq.fa'
    varscan_dir = '/home/yaneng/RSun/Softwares/VarScan.v2.3.9.jar'
    min_MAPQ = 30
    pileup_file = '/home/yaneng/RSun/Data/qiagen-colon/variance_called/QIANGEN-2959YJ_S2_L001.pileup'
    output_vcf = '/home/yaneng/RSun/Data/qiagen-colon/variance_called/QIANGEN-2959YJ_S2_L001_var.vcf'
    
    sortSAM(sam_file, samtool_dir, out_bam_file, out_sort_file)
    callVar(out_sort_file, ref_FA_file, samtool_dir, varscan_dir, min_MAPQ, pileup_file, output_vcf)
    

In [12]:
if __name__ == '__main__':
    main()

Alignment file is: /home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.sam
Sorting the alignment file...
/home/yaneng/RSun/Softwares/samtools/samtools view -b /home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.sam -o /home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.bam
/home/yaneng/RSun/Softwares/samtools/samtools sort /home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.bam -o /home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_sorted.bam
The alignment file /home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_aligned.sam has been successfully sorted.
Sorted alignment file is: /home/yaneng/RSun/Data/qiagen-colon/aligned/QIANGEN-2959YJ_S2_L001_sorted.bam
Reference sequence file has been faidx indexed.


Piling alignment results up with the reference genome sequence...
/home/yaneng/RSun/Softwares/samtools/samtools mpileup -f /home/yaneng/RSun/Data/qiagen-colon/DHS-0