Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 24 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ On a 30X human genome, the TIDDIT SV module typically completetes within 5 hours

INSTALLATION
==============
TIDDIT requires python3, cython, pysam, and Numpy; as well as bwa and fermikit (fermi2 and ropebwt2).
TIDDIT requires python3, cython, pysam, and Numpy.

By default, tiddit will require, bwa, fermi2 and ropebwt2 for local assembly; local assembly may be disabled through the "--skip_assembly" parameter.

Installation

Cloning from Git Hub:

```
git clone https://github.com/SciLifeLab/TIDDIT.git
```
Expand All @@ -22,39 +26,38 @@ cd tiddit
pip install -e .
```

Next install fermikit, I recommend using conda:
Next install fermi2, ropebwt2, and bwa, I recommend using conda:

```
conda install fermikit
```
conda install fermi2 ropebwt2 bwa

You may also compile bwa, fermi2, and ropebwt2 yourself. Remember to add executables to path, or provide path through the command line parameters.

```

tiddit --help
tiddit --sv --help
tiddit --cov --help
tiddit --sv --help
tiddit --cov --help
```

TIDDIT may be installed using bioconda:

conda install tiddit
```
conda install tiddit
```

Next, you may run TIDDIT like this:

tiddit --help
tiddit --sv
tiddit --cov
```
tiddit --help
tiddit --sv
tiddit --cov
```

TIDDIT is also distributed with a Docker container (http://singularity.lbl.gov/index.html). Type the following command to download the container:

singularity pull --name TIDDIT.simg
```
singularity pull --name TIDDIT.simg
```

Type the following to run tiddit:

singularity exec TIDDIT.simg tiddit

```
singularity exec TIDDIT.simg tiddit
```

The SV module
=============
Expand All @@ -80,6 +83,7 @@ TIDDIT may be fine-tuned by altering these optional parameters:
-z minimum variant size (default=50), variants smaller than this will not be printed ( z < 10 is not recomended)
--force_ploidy force the ploidy to be set to -n across the entire genome (i.e skip coverage normalisation of chromosomes)
--n_mask exclude regions from coverage calculation if they contain more than this fraction of N (default = 0.5)
--skip_assembly Skip running local assembly, tiddit will perform worse, but wont require fermi2, bwa, ropebwt and bwa indexed ref
--bwa path to bwa executable file(default=bwa)
--fermi2 path to fermi2 executable file (default=fermi2)
--ropebwt2 path to ropebwt2 executable file (default=ropebwt2)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name = 'tiddit',
version = '3.0.0',
version = '3.1.0',
url = "https://github.com/J35P312/SVDB",
author = "Jesper Eisfeldt",
author_email= "jesper.eisfeldt@scilifelab.se",
Expand Down
40 changes: 24 additions & 16 deletions tiddit/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import tiddit.tiddit_contig_analysis as tiddit_contig_analysis

def main():
version="3.0.0"
version="3.1.0"
parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true")
parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")
Expand Down Expand Up @@ -45,6 +45,7 @@ def main():
parser.add_argument('--bwa', type=str,default="bwa", help="path to bwa executable file(default=bwa)")
parser.add_argument('--fermi2', type=str,default="fermi2", help="path to fermi2 executable file (default=fermi2)")
parser.add_argument('--ropebwt2', type=str , default="ropebwt2", help="path to ropebwt2 executable file (default=ropebwt2)")
parser.add_argument('--skip_assembly', action="store_true", help="Skip running local assembly, tiddit will perform worse, but wont require fermi2, bwa, ropebwt and bwa indexed ref")
parser.add_argument('--p_ratio', type=float,default=0.1, help="minimum discordant pair/normal pair ratio at the breakpoint junction(default=0.1)")
parser.add_argument('--r_ratio', type=float,default=0.1, help="minimum split read/coverage ratio at the breakpoint junction(default=0.1)")
parser.add_argument('--max_coverage', type=float,default=4, help="filter call if X times higher than chromosome average coverage (default=4)")
Expand All @@ -55,24 +56,29 @@ def main():
print ("error, too low --l value!")
quit()

if not os.path.isfile(args.bwa) and not shutil.which(args.bwa):
print("error, BWA executable missing, add BWA to path, or specify using --bwa")

if not os.path.isfile(args.fermi2) and not shutil.which(args.fermi2):
print("error, fermi2 executable missing, add fermi2 to path, or specify using --fermi2")
if not args.skip_assembly:
if not os.path.isfile(args.bwa) and not shutil.which(args.bwa):
print("error, BWA executable missing, add BWA to path, or specify using --bwa")
quit()

if not os.path.isfile(args.ropebwt2) and not shutil.which(args.ropebwt2):
print("error, ropebwt2 executable missing, add ropebwt2 to path, or specify using --ropebwt2")
if not os.path.isfile(args.fermi2) and not shutil.which(args.fermi2):
print("error, fermi2 executable missing, add fermi2 to path, or specify using --fermi2")
quit()

if args.ref:
if not os.path.isfile(args.ref):
print ("error, could not find the reference file")
if not os.path.isfile(args.ropebwt2) and not shutil.which(args.ropebwt2):
print("error, ropebwt2 executable missing, add ropebwt2 to path, or specify using --ropebwt2")
quit()

if not os.path.isfile(args.ref+".bwt") and not os.path.isfile(args.ref+".64.bwt"):
print ("error, The reference must be indexed using bwa index")
quit()


if not os.path.isfile(args.ref):
print ("error, could not find the reference file")
quit()


if not (args.bam.endswith(".bam") or args.bam.endswith(".cram")):
print ("error, the input file is not a bam file, make sure that the file extension is .bam or .cram")
quit()
Expand Down Expand Up @@ -134,18 +140,20 @@ def main():
print(time.time()-t)


t=time.time()
tiddit_contig_analysis.main(prefix,sample_id,library,contigs,coverage_data,args)
print("Clip read assembly in:")
print(time.time()-t)
if not args.skip_assembly:

t=time.time()
tiddit_contig_analysis.main(prefix,sample_id,library,contigs,coverage_data,args)
print("Clip read assembly in:")
print(time.time()-t)

vcf_header=tiddit_vcf_header.main( bam_header,library,sample_id,version )

if not args.e:
args.e=int(library["avg_insert_size"]/2.0)

t=time.time()
sv_clusters=tiddit_cluster.main(prefix,contigs,contig_length,samples,library["mp"],args.e,args.l,max_ins_len,args.min_contig)
sv_clusters=tiddit_cluster.main(prefix,contigs,contig_length,samples,library["mp"],args.e,args.l,max_ins_len,args.min_contig,args.skip_assembly)

print("generated clusters in")
print(time.time()-t)
Expand Down
51 changes: 26 additions & 25 deletions tiddit/tiddit_cluster.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def find_discordant_pos(fragment,is_mp):

return(posA,posB)

def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,min_contig):
def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,min_contig,skip_assembly):

discordants={}
contigs=set([])
Expand Down Expand Up @@ -103,38 +103,39 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
positions[chrA][chrB].append([int(posA),int(posB),i])
i+=1

for line in open("{}_tiddit/contigs_{}.tab".format(prefix,sample)):
content=line.rstrip().split("\t")
chrA=content[1]
chrB=content[2]
if not skip_assembly:
for line in open("{}_tiddit/contigs_{}.tab".format(prefix,sample)):
content=line.rstrip().split("\t")
chrA=content[1]
chrB=content[2]

if contig_length[chrA] < min_contig or contig_length[chrB] < min_contig:
continue
if contig_length[chrA] < min_contig or contig_length[chrB] < min_contig:
continue


if not chrA in positions:
positions[chrA]={}
if not chrB in positions[chrA]:
positions[chrA][chrB]=[]
if not chrA in positions:
positions[chrA]={}
if not chrB in positions[chrA]:
positions[chrA][chrB]=[]

if not chrA in discordants:
discordants[chrA]={}
if not chrB in discordants[chrA]:
discordants[chrA][chrB]=[]
if not chrA in discordants:
discordants[chrA]={}
if not chrB in discordants[chrA]:
discordants[chrA][chrB]=[]


posA=content[3]
posB=content[5]
posA=content[3]
posB=content[5]

if int(posA) > contig_length[chrA]:
posA=contig_length[chrA]
if int(posB) > contig_length[chrB]:
posB=contig_length[chrB]
if int(posA) > contig_length[chrA]:
posA=contig_length[chrA]
if int(posB) > contig_length[chrB]:
posB=contig_length[chrB]

discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i])
positions[chrA][chrB].append([int(posA),int(posB),i])
contigs.add(i)
i+=1
discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i])
positions[chrA][chrB].append([int(posA),int(posB),i])
contigs.add(i)
i+=1

candidates={}
for chrA in chromosomes:
Expand Down
15 changes: 8 additions & 7 deletions tiddit/tiddit_variant.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,16 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl

contig_seqs={}
new_seq=False
for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):
if not args.skip_assembly:
for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):

if not new_seq and line[0] == "@" and "\t" in line:
name=line.split("\t")[0][1:]
new_seq=True
if not new_seq and line[0] == "@" and "\t" in line:
name=line.split("\t")[0][1:]
new_seq=True

elif new_seq:
contig_seqs[name]=line.strip("\n")
new_seq=False
elif new_seq:
contig_seqs[name]=line.strip("\n")
new_seq=False

for chrA in sv_clusters:
variants[chrA]=[]
Expand Down