# Generate config files for new SAILOR pipeline (snakemake), hg38, with default settings

### This script will make sailor.qsub, sailor_snakemake.sh and hg38_sailor_config_ct.json

In [1]:
import os
cwd = os.getcwd()

main_directory = cwd
print(main_directory)

/tscc/lustre/ddn/scratch/q2liang/isSTAMP_publication_scripts/example_analysis_notebooks


In [2]:
import subprocess

config_path = main_directory
qsub_file = main_directory + "/sailor.qsub"
sh_file = main_directory + "/sailor_snakemake.sh"
json_file = main_directory + "/hg38_sailor_config_ct.json"
samples_path = main_directory + "/genome_align_se/results"
output_dir = main_directory + "/sailor_output/"

subprocess.run(["mkdir", output_dir])
print(config_path, '\n', qsub_file, '\n', sh_file, '\n', json_file, '\n', samples_path, '\n', output_dir)

/tscc/lustre/ddn/scratch/q2liang/isSTAMP_publication_scripts/example_analysis_notebooks 
 /tscc/lustre/ddn/scratch/q2liang/isSTAMP_publication_scripts/example_analysis_notebooks/sailor.qsub 
 /tscc/lustre/ddn/scratch/q2liang/isSTAMP_publication_scripts/example_analysis_notebooks/sailor_snakemake.sh 
 /tscc/lustre/ddn/scratch/q2liang/isSTAMP_publication_scripts/example_analysis_notebooks/hg38_sailor_config_ct.json 
 /tscc/lustre/ddn/scratch/q2liang/isSTAMP_publication_scripts/example_analysis_notebooks/genome_align_se/results 
 /tscc/lustre/ddn/scratch/q2liang/isSTAMP_publication_scripts/example_analysis_notebooks/sailor_output/


In [3]:
import os
from glob import glob

samples_to_keep = [f.split('/')[-1] for f in glob(samples_path + '/*fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam')]

print(len(samples_to_keep))
print(samples_to_keep)


18
['isSTAMP10_07_S7_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_13_S13_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_06_S6_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_10_S10_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_05_S5_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_04_S4_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_15_S15_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_02_S2_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_18_S18_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_03_S3_L004_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted.bam', 'isSTAMP10_16_S16_L004

In [4]:
qsub_header = [
    '#!/bin/bash',
    '#PBS -N sailor',
    '#PBS -o sailor.sh.out',
    '#PBS -e sailor.sh.err',
    '#PBS -V',
    '#PBS -l walltime=48:00:00', 
    '#PBS -l nodes=1:ppn=8',
    '#PBS -A yeo-group',
    '#PBS -q home',
    ''
    '# USE FULL PATHS TO YOUR CONFIG FILE AND LOG'
]

In [5]:

cmd_line = 'source .bashrc\n' + 'module load snakemake\n' + 'cd ' + config_path + '\n' + 'chmod +x ' + sh_file + '\n' + 'sh ' + sh_file


with open(qsub_file, 'w') as f:
    for l in qsub_header:
        f.write('{}\n'.format(l))
    f.write('\n')
    f.write(cmd_line)
    f.write('\n')


In [6]:
sailor_sh_lines = [
'snakemake \\',
'--profile /tscc/projects/ps-yeolab4/software/stamp/0.99.0/bin/stamp/profiles/tscc_sailor \\',
'--configfile ' + json_file + ' \\',
'--rerun-incomplete \\',
'-j 30'
]

with open(sh_file, 'w') as f:
    for h in sailor_sh_lines:
        f.write('{}\n'.format(h))



In [7]:
import json 

input_json_ct = {
    'samples_path': samples_path,
    'samples': samples_to_keep,
    'remove_duplicates':False,
    'junction_overhang':10,
    'edge_mutation':5,
    'non_ag':1,
    'reverse_stranded':True,
    'reference_fasta':"/tscc/projects/ps-yeolab3/bay001/annotations/hg38/hg38.fa",
    'dp':"DP4",
    'min_variant_coverage':5,
    'known_snps':"/tscc/projects/ps-yeolab3/ekofman/ReferenceData/hg38/human_9606_b151_GRCh38p7_dbsnp/b151_GRCh38p7_common.with_chr.bed3",
    'edit_type':"ct",
    'edit_fraction':0.01,
    'alpha':0,
    'beta':0,
    'keep_all_edited':False,
    'output_dir':output_dir,
    'snakemake_dir_path':"/tscc/projects/ps-yeolab4/software/stamp/0.99.0/bin/stamp/"
}

with open(json_file, 'w') as f:
    json.dump(input_json_ct, f)
