SciLifeLab · J35P312 · Jun 7, 2022 · Jun 7, 2022 · Jun 7, 2022
diff --git a/README.md b/README.md
@@ -8,10 +8,14 @@ On a 30X human genome, the TIDDIT SV module typically completetes within 5 hours
 
 INSTALLATION
 ==============
-TIDDIT requires python3, cython, pysam, and Numpy; as well as bwa and fermikit (fermi2 and ropebwt2).
+TIDDIT requires python3, cython, pysam, and Numpy.
+
+By default, tiddit will require, bwa, fermi2 and ropebwt2 for local assembly; local assembly may be disabled through the "--skip_assembly" parameter.
 
 Installation
 
+Cloning from Git Hub:
+
 ```
 git clone https://github.com/SciLifeLab/TIDDIT.git
 ```
@@ -22,39 +26,38 @@ cd tiddit
 pip install -e .
 ```
 
-Next install fermikit, I recommend using conda:
+Next install fermi2, ropebwt2, and bwa, I recommend using conda:
 
-```
-conda install fermikit
-```
+conda install fermi2 ropebwt2 bwa
 
 You may also compile bwa, fermi2, and ropebwt2 yourself. Remember to add executables to path, or provide path through the command line parameters.
-
 ```
-
 tiddit --help
-tiddit  --sv --help
-tiddit  --cov --help
+tiddit --sv --help
+tiddit --cov --help
 ```
 
 TIDDIT may be installed using bioconda:
-
-	conda install tiddit
+```
+conda install tiddit
+```
 
 Next, you may run TIDDIT like this:
-
-	tiddit --help
-	tiddit --sv
-	tiddit --cov
+```
+tiddit --help
+tiddit --sv
+tiddit --cov
+```
 
 TIDDIT is also distributed with a Docker container (http://singularity.lbl.gov/index.html). Type the following command to download the container:
-
-    singularity pull --name TIDDIT.simg 
+```
+singularity pull --name TIDDIT.simg 
+```
 
 Type the following to run tiddit:
-
-    singularity exec TIDDIT.simg tiddit
-
+```
+singularity exec TIDDIT.simg tiddit
+```
 
 The SV module
 =============
@@ -80,6 +83,7 @@ TIDDIT may be fine-tuned by altering these optional parameters:
 	-z 	minimum variant size (default=50), variants smaller than this will not be printed ( z < 10 is not recomended)
 	--force_ploidy	force the ploidy to be set to -n across the entire genome (i.e skip coverage normalisation of chromosomes)
 	--n_mask	exclude regions from coverage calculation if they contain more than this fraction of N (default = 0.5)
+	--skip_assembly	Skip running local assembly, tiddit will perform worse, but wont require fermi2, bwa, ropebwt and bwa indexed ref
 	--bwa	path to bwa executable file(default=bwa)
 	--fermi2	path to fermi2 executable file (default=fermi2)
 	--ropebwt2	path to ropebwt2 executable file (default=ropebwt2)

diff --git a/setup.py b/setup.py
@@ -20,7 +20,7 @@
 
 setup(
     name = 'tiddit',
-    version = '3.0.0',
+    version = '3.1.0',
     url = "https://github.com/J35P312/SVDB",
     author = "Jesper Eisfeldt",
     author_email= "jesper.eisfeldt@scilifelab.se",

diff --git a/tiddit/__main__.py b/tiddit/__main__.py
@@ -16,7 +16,7 @@
 import tiddit.tiddit_contig_analysis as tiddit_contig_analysis
 
 def main():
-	version="3.0.0"
+	version="3.1.0"
 	parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
 	parser.add_argument("--sv"	 , help="call structural variation", required=False, action="store_true")
 	parser.add_argument("--cov"        , help="generate a coverage bed file", required=False, action="store_true")
@@ -45,6 +45,7 @@ def main():
 		parser.add_argument('--bwa', type=str,default="bwa", help="path to bwa executable file(default=bwa)")
 		parser.add_argument('--fermi2', type=str,default="fermi2", help="path to fermi2 executable file (default=fermi2)")
 		parser.add_argument('--ropebwt2', type=str , default="ropebwt2", help="path to ropebwt2 executable file (default=ropebwt2)")
+		parser.add_argument('--skip_assembly', action="store_true", help="Skip running local assembly, tiddit will perform worse, but wont require fermi2, bwa, ropebwt and bwa indexed ref")
 		parser.add_argument('--p_ratio', type=float,default=0.1, help="minimum discordant pair/normal pair ratio at the breakpoint junction(default=0.1)")
 		parser.add_argument('--r_ratio', type=float,default=0.1, help="minimum split read/coverage ratio at the breakpoint junction(default=0.1)")
 		parser.add_argument('--max_coverage', type=float,default=4, help="filter call if X times higher than chromosome average coverage (default=4)")
@@ -55,24 +56,29 @@ def main():
 			print ("error, too low --l value!")
 			quit()
 
-		if not os.path.isfile(args.bwa) and not shutil.which(args.bwa):
-			print("error, BWA executable missing, add BWA to path, or specify using --bwa")
-
-		if not os.path.isfile(args.fermi2) and not shutil.which(args.fermi2):
-			print("error, fermi2 executable missing, add fermi2 to path, or specify using --fermi2")
+		if not args.skip_assembly:
+			if not os.path.isfile(args.bwa) and not shutil.which(args.bwa):
+				print("error, BWA executable missing, add BWA to path, or specify using --bwa")
+				quit()
 
-		if not os.path.isfile(args.ropebwt2) and not shutil.which(args.ropebwt2):
-			print("error, ropebwt2 executable missing, add ropebwt2 to path, or specify using --ropebwt2")
+			if not os.path.isfile(args.fermi2) and not shutil.which(args.fermi2):
+				print("error, fermi2 executable missing, add fermi2 to path, or specify using --fermi2")
+				quit()
 
-		if args.ref:
-			if not os.path.isfile(args.ref):
-				print ("error,  could not find the reference file")
+			if not os.path.isfile(args.ropebwt2) and not shutil.which(args.ropebwt2):
+				print("error, ropebwt2 executable missing, add ropebwt2 to path, or specify using --ropebwt2")
 				quit()
 
 			if not os.path.isfile(args.ref+".bwt") and not os.path.isfile(args.ref+".64.bwt"):
 				print ("error, The reference must be indexed using bwa index")
 				quit()
 
+
+		if not os.path.isfile(args.ref):
+			print ("error,  could not find the reference file")
+			quit()
+
+
 		if not (args.bam.endswith(".bam") or args.bam.endswith(".cram")):
 			print ("error, the input file is not a bam file, make sure that the file extension is .bam or .cram")
 			quit()
@@ -134,18 +140,20 @@ def main():
 		print(time.time()-t)
 
 
-		t=time.time()
-		tiddit_contig_analysis.main(prefix,sample_id,library,contigs,coverage_data,args)
-		print("Clip read assembly in:")
-		print(time.time()-t)
+		if not args.skip_assembly:
+
+			t=time.time()
+			tiddit_contig_analysis.main(prefix,sample_id,library,contigs,coverage_data,args)
+			print("Clip read assembly in:")
+			print(time.time()-t)
 
 		vcf_header=tiddit_vcf_header.main( bam_header,library,sample_id,version )
 
 		if not args.e:
 			args.e=int(library["avg_insert_size"]/2.0)
 
 		t=time.time()
-		sv_clusters=tiddit_cluster.main(prefix,contigs,contig_length,samples,library["mp"],args.e,args.l,max_ins_len,args.min_contig)
+		sv_clusters=tiddit_cluster.main(prefix,contigs,contig_length,samples,library["mp"],args.e,args.l,max_ins_len,args.min_contig,args.skip_assembly)
 
 		print("generated clusters in")
 		print(time.time()-t)

diff --git a/tiddit/tiddit_cluster.pyx b/tiddit/tiddit_cluster.pyx
@@ -37,7 +37,7 @@ def find_discordant_pos(fragment,is_mp):
 
 	return(posA,posB)
 
-def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,min_contig):
+def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,min_contig,skip_assembly):
 
 	discordants={}
 	contigs=set([])
@@ -103,38 +103,39 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
 			positions[chrA][chrB].append([int(posA),int(posB),i])
 			i+=1
 
-		for line in open("{}_tiddit/contigs_{}.tab".format(prefix,sample)):
-			content=line.rstrip().split("\t")
-			chrA=content[1]
-			chrB=content[2]
+		if not skip_assembly:
+			for line in open("{}_tiddit/contigs_{}.tab".format(prefix,sample)):
+				content=line.rstrip().split("\t")
+				chrA=content[1]
+				chrB=content[2]
 
-			if contig_length[chrA] < min_contig or contig_length[chrB] < min_contig:
-				continue
+				if contig_length[chrA] < min_contig or contig_length[chrB] < min_contig:
+					continue
 
 
-			if not chrA in positions:
-				positions[chrA]={}
-			if not chrB in positions[chrA]:
-				positions[chrA][chrB]=[]
+				if not chrA in positions:
+					positions[chrA]={}
+				if not chrB in positions[chrA]:
+					positions[chrA][chrB]=[]
 
-			if not chrA in discordants:
-				discordants[chrA]={}
-			if not chrB in discordants[chrA]:
-				discordants[chrA][chrB]=[]
+				if not chrA in discordants:
+					discordants[chrA]={}
+				if not chrB in discordants[chrA]:
+					discordants[chrA][chrB]=[]
 
 
-			posA=content[3]
-			posB=content[5]
+				posA=content[3]
+				posB=content[5]
 
-			if int(posA) > contig_length[chrA]:
-				posA=contig_length[chrA]
-			if int(posB) > contig_length[chrB]:
-				posB=contig_length[chrB]
+				if int(posA) > contig_length[chrA]:
+					posA=contig_length[chrA]
+				if int(posB) > contig_length[chrB]:
+					posB=contig_length[chrB]
 
-			discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i])
-			positions[chrA][chrB].append([int(posA),int(posB),i])
-			contigs.add(i)
-			i+=1
+				discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i])
+				positions[chrA][chrB].append([int(posA),int(posB),i])
+				contigs.add(i)
+				i+=1
 
 	candidates={}
 	for chrA in chromosomes:

diff --git a/tiddit/tiddit_variant.pyx b/tiddit/tiddit_variant.pyx
@@ -176,15 +176,16 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
 
 	contig_seqs={}
 	new_seq=False
-	for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):			
+	if not args.skip_assembly:
+		for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):			
 
-		if not new_seq and line[0] == "@" and "\t" in line:
-			name=line.split("\t")[0][1:]
-			new_seq=True
+			if not new_seq and line[0] == "@" and "\t" in line:
+				name=line.split("\t")[0][1:]
+				new_seq=True
 
-		elif new_seq:
-			contig_seqs[name]=line.strip("\n")
-			new_seq=False
+			elif new_seq:
+				contig_seqs[name]=line.strip("\n")
+				new_seq=False
 
 	for chrA in sv_clusters:
 		variants[chrA]=[]