In [1]:
# This program aims to generate crRNA spacer sequence with certain strcture of stem-loops. 


import RNA
import random
import collections
import math

crRNA_header = 'UAAUUUCUACUCUUGUAGAU'   # Direct repeat sequence for AsCas12a
'''
crRNA_header = 'UAAUUUCUACUAAGUGUAGAU'  # Direct repeat sequence for LbCas12a
'''
# The function for designing crRNA satisfying certain structural criteria, with customized spacer length and GC content. This algorithm is based on random geberation of crRNAs and a following screening process.
def find_crRNA_hairpin(length, GC_content):
    random_crRNA_list = []      # Store randomly generated spacer sequences
    
    while True:
        random_GC = ''.join(random.choice('GC') for _ in range(int(length * GC_content)))   # Geberating random sequences composed of defined number of "G"s and "C"s
        random_AU = ''.join(random.choice('AU') for _ in range(length - int(length * GC_content)))   # Geberating random sequences composed of defined number of "A"s and "U"s
        random_crRNA = random_GC + random_AU   
        l = list(random_crRNA)
        random.shuffle(l)    # Shuffle the sequence to make distribution of "GC"s and "AU"s random, thus creating a spacer sequence with certain GC content
        random_crRNA = ''.join(l)

        flag = 0   # If not satisfying the criteria, the flag changes to -1 and exit the loop
        for i in range (0, len(random_crRNA)-3):
            if random_crRNA[i:i+4] == 'AAAA' or random_crRNA[i:i+4] =='UUUU' or random_crRNA[i:i+4] == 'CCCC' or random_crRNA[i:i+4] == 'GGGG':
                flag = -1   # Avoid homopolymers >= 4
                break

        if (flag == -1):
            continue

        for i in range (0, len(random_crRNA)-5):
            counts = collections.Counter(random_crRNA[i:i+6])
            if ((counts['A'] + counts['G'])/6 == 1 or (counts['C'] + counts['U'])/6 == 1 or (counts['A'] + counts['C'])/6 == 1 or (counts['G'] + counts['U'])/6 == 1 or (counts['C'] + counts['G'])/6 == 1 or (counts['A'] + counts['U'])/6 == 1):
                flag = -1   # Avoid consecutive 6 bases of two types, reduce synthesis error and increase the randomness in base distribution
                break

        if (flag == -1):
            continue

        crRNA_candidate = crRNA_header + random_crRNA  # The complete sequence of crRNA candidate for screening
        fc = RNA.fold_compound(crRNA_candidate)
        (ss, mfe) = fc.mfe()  # Calculate MFE structure and energy
        
        # Defining target MFE structures
        ss_8_pair = '.....(((((....))))).((((((((....))))))))'
        ss_7_pair = '.....(((((....)))))..(((((((....))))))).'
        ss_6_pair = '.....(((((....)))))...((((((....))))))..'
        ss_5_pair = '.....(((((....)))))....(((((....)))))...'
        ss_4_pair = '.....(((((....))))).....((((....))))....'
        ss_3_pair = '.....(((((....)))))......(((....))).....'
        
        # The target strcture can be selected from the above defined structure. Here we demonstrate the example of designing a 8_pair structure
        if (ss == ss_8_pair):
            pass
        else:
            continue
            
        (propensity, ensemble_energy) = fc.pf()
        basepair_probs = fc.bpp()     # Calculating base-pairing probabilities
        
        # In the design of 8_pair, the base pair at the 3' end is difficult to reach a probability of 95%, therefore we lower it down to 75%. This special criteria can be removed in the design of other structures
        if (basepair_probs[21][40] < 0.75):
            flag = -1
        if (flag == -1):
            continue
        
        # Defining positions of paired bases in spacer
        paired_8_pair = range(22, 29)
        paired_7_pair = range(22, 29)
        paired_6_pair = range(23, 29)
        paired_5_pair = range(24, 29)
        paired_4_pair = range(25, 29)
        paired_3_pair = range(26, 29)
         
        # Other base pairs in spacer with a probability higher than 95%
        for i in paired_8_pair:
            if (basepair_probs[i][61-i] < 0.95):
                flag = -1
                break
        if (flag == -1):
            continue
            
        # The unpaired bases in spacer should have a pairing probability lower than 5% to unpaired bases in direct repeat
        for i in range (1, 6):
            for j in range (21, 41):
                if (basepair_probs[i][j] >= 0.05):
                    flag = -1
                    break
            else:
                continue
            break
        if (flag == -1):
            continue
        
        # Defining positions of unpaired bases in spacer
        
        unpaired_8_pair = [29, 30, 31, 32]
        unpaired_7_pair = [21, 29, 30, 31, 32, 40]
        unpaired_6_pair = [21, 22, 29, 30, 31, 32, 39, 40]
        unpaired_5_pair = [21, 22, 23, 29, 30, 31, 32, 38, 39, 40]
        unpaired_4_pair = [21, 22, 23, 24, 29, 30, 31, 32, 37, 38, 39, 40]
        unpaired_3_pair = [21, 22, 23, 24, 25, 29, 30, 31, 32, 36, 37, 38, 39, 40]
        
        # The unpaired bases in spacer should have a base-pairing probability lower than 5% to all other bases in spacer    
        for i in unpaired_8_pair:
            for j in range (21, 41):
                if (basepair_probs[i][j] >= 0.05):
                    flag = -1
                    break
            else:
                continue
            break
        if (flag == -1):
            continue     

        # Stability of stem-loop in direct repeat with base-pairing probability higher than 99% for base pairs exluding the first one
        for i in range (7, 11):
            if (basepair_probs[i][25-i] < 0.99):
                flag = -1
                break
        if (flag == -1):
            continue
            
        # And 85% for the first one
        if (basepair_probs[6][19] < 0.85):
            continue
        
        return crRNA_candidate
                
# Afunction to generate short 20-nt ssDNA sequence corresponding to the designed crRNA                    
def crRNA_reverse_complement(crRNA):
    crRNA_spacer = crRNA[20:40]
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'U': 'A'}
    return ''.join(complement.get(base, base) for base in reversed(crRNA_spacer))

# A function to create sequence to be input to IDT for ordering
def DNA_reverse_complement(DNA):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
    return ''.join(complement.get(base, base) for base in reversed(DNA))

crRNA_selected = find_crRNA_hairpin(20, 0.2)   # (length, GC content)
print('crRNA:')
print(crRNA_selected)

fc = RNA.fold_compound(crRNA_selected)
(propensity, ensemble_energy) = fc.pf()
basepair_probs = fc.bpp()

ssDNA_target_selected = crRNA_reverse_complement(crRNA_selected)
print('ssDNA target:')
print(ssDNA_target_selected)

ssDNA_IDT_order = DNA_reverse_complement(ssDNA_target_selected)
print('IDT ordered ssDNA:')
print(ssDNA_IDT_order)




crRNA:
UAAUUUCUACUCUUGUAGAUAGAUAAACUAAAGUUUAUCU
ssDNA target:
AGATAAACTTTAGTTTATCT
IDT ordered ssDNA:
AGATAAACTAAAGTTTATCT


In [2]:
# To generate linear spacers, repeated notes are not mentioned in this program

import RNA
import random
import collections
import math

crRNA_header = 'UAAUUUCUACUCUUGUAGAU'   # Direct repeat sequence for AsCas12a
'''
crRNA_header = 'UAAUUUCUACUAAGUGUAGAU'  # Direct repeat sequence for LbCas12a
'''
def find_crRNA_linear(length, GC_content):
    while True:
        random_GC = ''.join(random.choice('GC') for _ in range(int(length * GC_content)))
        random_AU = ''.join(random.choice('AU') for _ in range(length - int(length * GC_content)))
        random_crRNA = random_GC + random_AU
        l = list(random_crRNA)
        random.shuffle(l)
        random_crRNA = ''.join(l)
        flag = 0
        for i in range (0, len(random_crRNA)-3):
            if random_crRNA[i:i+4] == 'AAAA' or random_crRNA[i:i+4] =='UUUU' or random_crRNA[i:i+4] == 'CCCC' or random_crRNA[i:i+4] == 'GGGG':
                flag = -1
                break

        if (flag == -1):
            continue

        for i in range (0, len(random_crRNA)-5):
            counts = collections.Counter(random_crRNA[i:i+6])
            if ((counts['A'] + counts['G'])/6 == 1 or (counts['C'] + counts['U'])/6 == 1 or (counts['A'] + counts['C'])/6 == 1 or (counts['G'] + counts['U'])/6 == 1 or (counts['C'] + counts['G'])/6 == 1 or (counts['A'] + counts['U'])/6 == 1):
                flag = -1
                break

        if (flag == -1):
            continue
            
        crRNA_candidate = crRNA_header + random_crRNA
        fc = RNA.fold_compound(crRNA_candidate)
        (ss, mfe) = fc.mfe()

        if (ss == '.....(((((....))))).....................'):
            pass
        else:
            continue
        
        (propensity, ensemble_energy) = fc.pf()
        basepair_probs = fc.bpp()
        
        # In the design of linear spacer, we set a stricter criteria where every base in spacer should have base-pairing probabilities lower than 1% to all other bases in the spacer
        for i in range(21, 40):
            for j in range (i+1, 41):
                if (basepair_probs[i][j] >= 0.01):
                    flag = -1
                    break
            else:
                continue
            break
        if (flag == -1):
            continue
            
        for i in range (1, 6):
            for j in range (21, 41):
                if (basepair_probs[i][j] >= 0.05):
                    flag = -1
                    break
            else:
                continue
            break
        if (flag == -1):
            continue   
            
        for i in range (7, 11):
            if (basepair_probs[i][25-i] < 0.99):
                flag = -1
                break
        if (flag == -1):
            continue
            
        if (basepair_probs[6][19] < 0.85):
            continue
            
        # MFE frequency no less than 0.80 for crRNA with linear spacer  
        (bp_propensity, dG) = fc.pf()    
        kT = RNA.exp_param().kT / 1000.
        prob_mfe = math.exp((dG - mfe) / kT)
        if (prob_mfe) < 0.80:
            continue
        
        return crRNA_candidate
                
                    
def crRNA_reverse_complement(crRNA):
    crRNA_spacer = crRNA[20:40]
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'U': 'A'}
    return ''.join(complement.get(base, base) for base in reversed(crRNA_spacer))

def DNA_reverse_complement(DNA):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
    return ''.join(complement.get(base, base) for base in reversed(DNA))

crRNA_selected = find_crRNA_linear(20, 0.25)
print('crRNA:')
print(crRNA_selected)

fc = RNA.fold_compound(crRNA_selected)
(propensity, ensemble_energy) = fc.pf()
basepair_probs = fc.bpp()

ssDNA_target_selected = crRNA_reverse_complement(crRNA_selected)
print('ssDNA target:')
print(ssDNA_target_selected)

ssDNA_IDT_order = DNA_reverse_complement(ssDNA_target_selected)
print('IDT ordered ssDNA:')
print(ssDNA_IDT_order)



crRNA:
UAAUUUCUACUCUUGUAGAUUCAACAUAACUAUCUCUAAA
ssDNA target:
TTTAGAGATAGTTATGTTGA
IDT ordered ssDNA:
TCAACATAACTATCTCTAAA
