Skip to content

Commit

Permalink
mapping to NCTC assembly done
Browse files Browse the repository at this point in the history
  • Loading branch information
govinda-kamath committed Aug 6, 2016
1 parent 6ca0094 commit 3739f8a
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
47 changes: 47 additions & 0 deletions map_to_nctc_assemblies.py
@@ -0,0 +1,47 @@
#!/usr/bin/python

import sys
import os
import subprocess

bact_id = sys.argv[1]
path_to_hinge = sys.argv[2]

base_path='data/'+bact_id+"/"

st_point = 0
if len(sys.argv) > 3:
st_point = int(sys.argv[3])

old_fasta_name = bact_id+'_hgap.fasta'
hgap_db_name = bact_id+'_hgap'
db_name = bact_id
fasta_name = bact_id+'_hgap_pb.fasta'
laname = hgap_db_name + "." + db_name + '.las'

if st_point <= 1:
subprocess.call("python "+path_to_hinge+"scripts/correct_head.py "+ old_fasta_name+" "+ fasta_name+ " hgapgt.txt",shell=True,cwd=base_path)
subprocess.call("rm -f *_hgap.db",shell=True,cwd=base_path)
fasta2DB_cmd = "fasta2DB "+hgap_db_name+' '+fasta_name
print fasta2DB_cmd
subprocess.check_output(fasta2DB_cmd.split(),cwd=base_path)

if st_point <= 2:
subprocess.call("rm -f *_hgap.*.las",shell=True,cwd=base_path)
mapper_cmd = "HPCmapper "+hgap_db_name+" "+db_name
print mapper_cmd
mapper_shell_cmd = "csh -v mapper_cmdf.sh"
p = subprocess.call(mapper_cmd.split(),stdout=open(base_path+'mapper_cmdf.sh','w') , cwd=base_path)
p2 = subprocess.check_output(mapper_shell_cmd.split(), cwd=base_path)

if st_point <= 3:
rename = hgap_db_name + "*." + db_name + '*.las'
LAmerge_cmd = "LAmerge "+laname+" "+rename
print LAmerge_cmd
subprocess.check_output(LAmerge_cmd,cwd=base_path,shell=True)

if st_point <= 4:
mp_cmd = "python "+path_to_hinge+"scripts/run_mapping2.py "+hgap_db_name+" "+db_name+" "+laname+' 1-$ 4'
print mp_cmd
subprocess.check_output(mp_cmd, cwd=base_path, shell=True)

26 changes: 26 additions & 0 deletions map_wrapper.py
@@ -0,0 +1,26 @@
import numpy as np
import os
import multiprocessing as mp
import subprocess
import sys

base_path = 'data/'

#file_list = np.loadtxt('/data/pacbio_assembly/notebook/files_assembled.txt',dtype='str')

file_list = np.loadtxt('NCTC_names.txt',dtype='str')
path_to_hinge = sys.argv[1]

num_proc = 1
if len(sys.argv) > 2:
num_proc = int(sys.argv[2])


def run_pipeline(flname):
base_dir = base_path + flname + '/'
base_cmd = 'python map_to_nctc_assemblies.py '+ flname +' '+ path_to_hinge
print base_cmd
subprocess.call(base_cmd, shell=True)

pool = mp.Pool(processes=num_proc)
pool.map(run_pipeline,file_list)

0 comments on commit 3739f8a

Please sign in to comment.