Skip to content

Commit

Permalink
feat: allow multi-fasta reference input
Browse files Browse the repository at this point in the history
  • Loading branch information
florianzwagemaker committed Jul 14, 2023
1 parent 2889232 commit cd4f413
Showing 1 changed file with 51 additions and 45 deletions.
96 changes: 51 additions & 45 deletions AmpliGone/fasta2bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,52 +91,58 @@ def CoordListGen(primerfile, referencefile, err_rate=0.1):

primers = list(SeqIO.parse(primerfile, "fasta"))

ref_file = SeqIO.read(referencefile, "fasta")
ref_seq = str(ref_file.seq)

for primer in primers:
seq = str(primer.seq)
revcomp = Seq.reverse_complement(seq)

coords = get_coords(seq, ref_seq, err_rate)
rev_coords = get_coords(revcomp, ref_seq, err_rate)
if coords and rev_coords:
log.warning(
f"Primer [yellow underline]{primer.id}[/yellow underline] found on both forward and reverse strand of '[yellow]{ref_file.name}[/yellow]'.\n[yellow bold]Check to see if this is intended.[/yellow bold]"
)
if not coords and not rev_coords:
log.warning(
f"Skipping [yellow underline]{primer.id}[/yellow underline] as it is found on neither forward or reverse strand of [yellow underline]{ref_file.name}[/yellow underline].\n[yellow bold]Check to see if this is intended.[/yellow bold]"
)
continue
if coords and len(coords) > 1:
log.warning(
f"Primer [yellow underline]{primer.id}[/yellow underline] found on multiple locations on [underline]forward strand[/underline]: \n\t[yellow]{coords}\n[bold]Check to see if this is intended.[/yellow][/bold]"
)
if rev_coords and len(rev_coords) > 1:
log.warning(
f"Primer [yellow underline]{primer.id}[/yellow underline] found on multiple locations on [underline]reverse strand[/underline]: {rev_coords}.\nCheck to see if this is intended."
)
for start, end in coords.union(rev_coords):
if any(o in primer.id for o in keyl):
strand = "+"
elif any(o in primer.id for o in keyr):
strand = "-"
else:
log.error(
f"Primer name {primer.id} does not contain orientation (e.g. {primer.id}_RIGHT). Consider suffixing with {keyl + keyr}"
ref_file = list(SeqIO.parse(referencefile, "fasta"))
ref_seq = [str(ref.seq) for ref in ref_file]
ref_id = [ref.id for ref in ref_file]

# The loop in a loop here is not a particularly efficient way of doing this.
# But this is the easiest implementation for now, and it's not like this is a particularly
# cpu or time intensive process anyway.
# Might come back to this when there's more time to create a better solution.
for ref_seq, ref_id in zip(ref_seq, ref_id):
for primer in primers:
seq = str(primer.seq)
revcomp = Seq.reverse_complement(seq)

coords = get_coords(seq, ref_seq, err_rate)
rev_coords = get_coords(revcomp, ref_seq, err_rate)
if coords and rev_coords:
log.warning(
f"Primer [yellow underline]{primer.id}[/yellow underline] found on both forward and reverse strand of '[yellow]{ref_id}[/yellow]'.\n[yellow bold]Check to see if this is intended.[/yellow bold]"
)
if not coords and not rev_coords:
log.warning(
f"Skipping [yellow underline]{primer.id}[/yellow underline] as it is found on neither forward or reverse strand of [yellow underline]{ref_id}[/yellow underline].\n[yellow bold]Check to see if this is intended.[/yellow bold]"
)
continue
if coords and len(coords) > 1:
log.warning(
f"Primer [yellow underline]{primer.id}[/yellow underline] found on multiple locations on [underline]forward strand[/underline]: \n\t[yellow]{coords}\n[bold]Check to see if this is intended.[/yellow][/bold]"
)
if rev_coords and len(rev_coords) > 1:
log.warning(
f"Primer [yellow underline]{primer.id}[/yellow underline] found on multiple locations on [underline]reverse strand[/underline]: {rev_coords}.\nCheck to see if this is intended."
)
for start, end in coords.union(rev_coords):
if any(o in primer.id for o in keyl):
strand = "+"
elif any(o in primer.id for o in keyr):
strand = "-"
else:
log.error(
f"Primer name {primer.id} does not contain orientation (e.g. {primer.id}_RIGHT). Consider suffixing with {keyl + keyr}"
)
continue
yield dict(
ref=ref_id,
start=start,
end=end,
name=primer.id,
score=".",
strand=strand,
seq=seq,
revcomp=revcomp,

Check notice on line 144 in AmpliGone/fasta2bed.py

View check run for this annotation

codefactor.io / CodeFactor

AmpliGone/fasta2bed.py#L75-L144

Complex Method
)
exit(1)
yield dict(
ref=ref_file.name,
start=start,
end=end,
name=primer.id,
score=".",
strand=strand,
seq=seq,
revcomp=revcomp,
)


def CoordinateListsToBed(df, outfile):
Expand Down

0 comments on commit cd4f413

Please sign in to comment.