# Deleting 3 copies of rrn operon from WHO_F_2024

In [1]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

In [2]:
# Load WHO_F_2024 genome
genome = SeqIO.read("ref_full_genome/who_f_2024.fa", "fasta")

In [3]:
# Removing 3 copies of rrn operon
genome_seq = genome.seq
genome_seq = genome_seq[:2_000_757] + genome_seq[2_006_159:]
genome_seq = genome_seq[:1_778_506] + genome_seq[1_783_908:]
genome_seq = genome_seq[:1_354_814] + genome_seq[1_360_216:]
print(len(genome.seq))
print(len(genome_seq))

2292467
2276261


In [4]:
# Creating a new shortened genome
shortened_genome = SeqRecord(
    Seq(genome_seq),
    name=genome.name,
    id=genome.id,
    description=genome.description,
    dbxrefs=genome.dbxrefs,
    features=genome.features,
    annotations=genome.annotations,
)

In [5]:
# Saving a new shortened genome
SeqIO.write(shortened_genome, "who_f_2024_short.fa", "fasta")

1

# Searching for the relevant SNP sites in a reverse-complement genome

In [6]:
# rrl and rrn coordinates before reverse-complementing
pos_before = [1_212_242, 1_215_133]
pos_rrn_before = [1_211_877, 1_217_278]

# Loading shortened genome
short_genome = SeqIO.read("ref_short_genome/revc_who_f_2024.fa", "fasta")

# Finding genome length
genome_len = len(short_genome.seq)

# SNP sites in rrl gene. {E. coli numbering: N. gonorrhoeae numbering}.
mut_sites = {2611: 2599, 2058: 2046, 2059: 2047}

# New coordinates of rrl and rrn after obtaining reverse-complement sequence
pos_new = [-i + genome_len + 1 for i in pos_before][::-1]
pos_rrn_new = [-i + genome_len + 1 for i in pos_rrn_before][::-1]

# New coordinates of SNP sites
mut_sites_in_genome = {
    2611: mut_sites[2611] - 1 + pos_new[0],
    2058: mut_sites[2058] - 1 + pos_new[0],
    2059: mut_sites[2059] - 1 + pos_new[0],
}

print(f"New coordinates of rrl gene: {pos_new}")

print(f"New coordinates of rrn operon: {pos_rrn_new}")

print(f"New coordinates of SNP sites in 23S rRNA gene: {mut_sites_in_genome}")

New coordinates of rrl gene: [1061129, 1064020]
New coordinates of rrn operon: [1058984, 1064385]
New coordinates of SNP sites in 23S rRNA gene: {2611: 1063727, 2058: 1063174, 2059: 1063175}
