In [14]:
def load_sequence(file_path: str) -> str:
    with open(file_path, 'r') as file:
        sequence = file.read().strip()
    return sequence

In [12]:
def digestion_enzyme(sequence: str, cleaving_enzyme: callable, missed_cleavages: int=0) -> list:
    oligo = set()
    cut_sites = [0]
    
    for i in range(0, len(sequence)-2):
        if cleaving_enzyme(sequence[i], sequence[i+1]):
            cut_sites.append(i+1)
    
    cut_sites.append(len(sequence))

    if len(cut_sites) > 2 and missed_cleavages in [0, 1, 2]:
        if  missed_cleavages == 0:
            for j in range(0,len(cut_sites)-1):
                oligo.add(sequence[cut_sites[j]:cut_sites[j+1]])

        elif missed_cleavages == 1:
            for j in range(0,len(cut_sites)-2):
                oligo.add(sequence[cut_sites[j]:cut_sites[j+1]])
                oligo.add(sequence[cut_sites[j]:cut_sites[j+2]])
            
            oligo.add(sequence[cut_sites[-2]:cut_sites[-1]])

        elif missed_cleavages == 2:
            for j in range(0,len(cut_sites)-3):
                oligo.add(sequence[cut_sites[j]:cut_sites[j+1]])
                oligo.add(sequence[cut_sites[j]:cut_sites[j+2]])
                oligo.add(sequence[cut_sites[j]:cut_sites[j+3]])
            
            oligo.add(sequence[cut_sites[-3]:cut_sites[-2]])
            oligo.add(sequence[cut_sites[-3]:cut_sites[-1]])
            oligo.add(sequence[cut_sites[-2]:cut_sites[-1]])
    else:
        oligo.add(sequence)
    
    oligo_list = list(oligo)
    oligo_list.sort(key=len)

    return oligo_list


In the next cell we define the different RNAses that we might use by their rules.
In details, `rnases` is a dictionary holding functions that return `True` if the enzyme should cut

In [7]:
rnases = {}

rnases["demo"] = lambda c, n: c == "A" and n in ["G", "U"]  # demo RNase that cleaves after A if followed by U or G
rnases["T1"] = lambda c, n: c == "G" and n in ["A", "C", "G", "U"] # RNase T1 cleaves after G
rnases["4"] = lambda c, n: c == "U" and n in ["A", "G"] # RNase 4 cleaves after U if followed by A or G 
rnases["MC1"] = lambda c, n: c in ["A", "C", "U"] and n == "U" # RNase MC1 cleaves after A, C, or U if followed by U  
rnases["Cusativin"] = lambda c, n: (c == "C" and n in ["A", "G", "U"]) or (c =="U" and n == "A") or (c in ["A", "U"] and n == "U") # Cusativin cleaves after C if followed by A, G, or U; after U if followed by A; after A or U if followed by U

Then, we can run the digestion simulation with some example data.

In [19]:
# seq = 'GAUAUCAAAGACACGACGGUGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUUUU'
seq = load_sequence('example-sequence.txt')
enzyme = rnases["demo"]

oligo_list = digestion_enzyme(seq, enzyme)
print(oligo_list)

['GA', 'GGA', 'GAA', 'GCA', 'GTA', 'GCCA', 'GACA', 'GCAA', 'GGAA', 'GAAA', 'GTCA', 'GGGA', 'GCTA', 'GTTA', 'GATA', 'GCTAA', 'GACAA', 'GTACA', 'GGAAA', 'GAAAA', 'GTCCA', 'GGGTA', 'GCCAA', 'GTGGA', 'GATGA', 'GTGAA', 'GGCTTA', 'GGGCCA', 'GTCTGA', 'GCGGAA', 'GATAAA', 'GAAATA', 'GCTTTA', 'GAATTA', 'GATGAA', 'GCTAAA', 'GCACAA', 'GTTCAA', 'GTCCTA', 'GTTTTA', 'GTTGAA', 'GCATAA', 'GGCAAA', 'GGGCGA', 'GTTCTTA', 'GGTTAAA', 'GCTCGTA', 'GGGATAA', 'GCTGGTA', 'GTTACTA', 'GGTTTGA', 'GATGATA', 'GTATAAA', 'GAAAACA', 'GGCGATA', 'GTCTGAA', 'GGAAATGA', 'GTCAAAAA', 'GACGATTA', 'GTACGTGA', 'GGTGAACA', 'GTGCCGGA', 'GGATATAA', 'GCAAATCA', 'ATGGATAA', 'GACAATAA', 'GATTATTGA', 'GAAAATGAA', 'GTAAATCAA', 'GTTAAAAAA', 'GGTGACTGA', 'GTGCGCAAA', 'GGTATCAAA', 'GATATCTTA', 'GATGATTGA', 'GCTTATTCA', 'GGTGCTTCA', 'GATGCTAAA', 'GACTGTAAAA', 'GATATCCTAA', 'GATTCTTAAA', 'GATGGCGAAA', 'GCATTATTTA', 'GACCGCCACA', 'GCAATTAAAA', 'GCTTTCAAAA', 'GAATTTATCA', 'GGTGGAAAAA', 'GTCTTTTTGA', 'GTTTCCTTAAA', 'GTCCAACGGTA', 'GTGGTTGCTAA',

In [None]:
with open("output.txt", mode="w") as data:
    data.write("\n".join(oligo_list) + "\n")