In [1]:
# RPA primer design for plasmid containing target to be amplified

import random
import primer3
import math
from array import *
import collections
from nupack import *

# Define NUPACK model, the sodium and magnesium concentration are the actual concentrations used in the experiment 
my_model = Model(material='dna', ensemble='stacking', celsius=39, sodium=0.05, magnesium=0.014)

# pUCIDT (Amp) Golden Gate vector sequences before and after the target to be synthesized 
puCIDT_pre = 'TCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACATGCAGCTCCCCTAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGCAGACAAGCCCGTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCAAATGCGGTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCATCGCTATTACGCCAGCTGGCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCACGACGTTGTAAAACGACGGCCAGTGCAACGCGATGACGATGGATAGCGATTCATCGATGAGCTGACCCGATCGCCGCCGCCGGAGGGTTGCGTTTGAGACAGGCGACAGAT'
puCIDT_post = 'ATCAGTTCTGGACCAGCGAGCTGTGCTGCGACTCGTGGCGTAATCATGGTCATAGCTGTTTCCTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGAAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGTCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGATCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTACCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTATAAAAATAGGCGTATCACGAGGCCCTTTCATC'

# To create random sequences of primers in certain length
def random_DNA(length):
    while True:
        random_primer = ''.join(random.choice('ACTG') for _ in range(length))   # Random "ACTG" combination 
        flag = 0   # If not satisfying the criteria, the flag will change to -1 and exit the loop 
        for i in range (0, len(random_primer)-3):
            if random_primer[i:i+4] == 'AAAA' or random_primer[i:i+4] =='TTTT' or random_primer[i:i+4] == 'CCCC' or random_primer[i:i+4] == 'GGGG':
                flag = -1   # Avoid homopolymers >= 4
                break
        for i in range (0, len(random_primer)-5):
            counts = collections.Counter(random_primer[i:i+6])
            if ((counts['C'] + counts['G'])/6 == 1 or (counts['A'] + counts['T'])/6 == 1):
                flag = -1  # Avoid consecutive 6 bases of two types, reduce synthesis error and increase the randomness in base distribution
                break
        if (flag == -1):
            continue
        else:
            return random_primer

def reverse_complement(DNA):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
    return ''.join(complement.get(base, base) for base in reversed(DNA))

# To calculate GC content of a DNA sequence
def GCPercent(seq):                       
    counts = collections.Counter(seq)
    percent = (counts['C'] + counts['G'])/float(len(seq)) 
    return percent

# To check if the ransomly generated primer meet the screening criteria
def primer_screen(length):
    
    forward_primer = random_DNA(length)   # Generate random forward primer
    reverse_primer = random_DNA(length)   # Generate random reverse primer
    
    while True:
        if (GCPercent(forward_primer) < 0.45 or GCPercent(forward_primer) > 0.55):   # Keep GC content of forward primer 45%-55%
            forward_primer = random_DNA(length)
            continue
        else:
            pass
        
        if (GCPercent(reverse_primer) < 0.45 or GCPercent(reverse_primer) > 0.55):   # Keep GC content of reverse primer 45%-55%
            reverse_primer = random_DNA(length)
            continue
        else:
            pass
        
        mfe_structure_forward = mfe(strands=forward_primer, model=my_model)
        if (str(mfe_structure_forward[0].structure) != '................................'):   # No secondary structure on forward primer (probability >= 25% predicted by NUPACK)
            forward_primer = random_DNA(length)
            continue
        else:
            pass

        probability_forward = structure_probability(strands=forward_primer, structure='................................',model=my_model)
        if (probability_forward < 0.25):
            forward_primer = random_DNA(length)
            continue
        else:
            pass

        mfe_structure_reverse = mfe(strands=reverse_primer, model=my_model)
        if (str(mfe_structure_reverse[0].structure) != '................................'):   # No secondary structure on reverse primer (probability >= 25% predicted by NUPACK)
            reverse_primer = random_DNA(length)
            continue
        else:
            pass

        probability_reverse = structure_probability(strands=reverse_primer, structure='................................',model=my_model)
        if (probability_reverse < 0.25):
            reverse_primer = random_DNA(length)
            continue
        else:
            pass
        
        # No hairpin structure above 20 ℃ for both forward and reverse primer predicted by NUPACK, ion, dntp, template concentration the same as used in experiment
        res_forward = primer3.calcHairpin(forward_primer, mv_conc=3.54, dv_conc=14, dntp_conc=0.24, dna_conc=480)
        if res_forward.tm >= 20:
            forward_primer = random_DNA(length)
            continue
        else:
            pass 
       
        res_reverse = primer3.calcHairpin(reverse_primer, mv_conc=3.54, dv_conc=14, dntp_conc=0.24, dna_conc=480)
        if res_reverse.tm >= 20:
            reverse_primer = random_DNA(length)
            continue
        else:
            pass 

        # No primer homodimers formed above 20 ℃ for both forward and reverse primer
        res_forward = primer3.calcHomodimer(forward_primer, mv_conc=3.54, dv_conc=14, dntp_conc=0.24, dna_conc=480)
        if res_forward.tm >= 20:
            forward_primer = random_DNA(length)
            continue
        else:
            pass   
        
        res_reverse = primer3.calcHomodimer(reverse_primer, mv_conc=3.54, dv_conc=14, dntp_conc=0.24, dna_conc=480)
        if res_reverse.tm >= 20:
            reverse_primer = random_DNA(length)
            continue
        else:
            pass      

        # No primer heterodimers between forward and reverse primer formed above 20 ℃
        res = primer3.calcHeterodimer(forward_primer, reverse_primer, mv_conc=3.54, dv_conc=14, dntp_conc=0.24, dna_conc=480)
        if res.tm >= 20:
            forward_primer = random_DNA(length)
            reverse_primer = random_DNA(length)
            continue
        else:
            pass   
    
        return forward_primer, reverse_primer
    
# To check if the target sequence for CRISPR reaction has significant overlap with the remaining sequence in the constructed plasmid that may induce interference
def similarity_check():
    while True:
        flag = 0   # If checked overlaps the flag will change to -1 and exit the loop 
        forward_primer, reverse_primer = primer_screen(32)  # Randomly generated 32-nt forward and reverse RPA primer using the previous function 
        
        # Define each concatanated targets to be amplified 
        seq_array_8_pair = [forward_primer, 'AGTATATCTATTGATATACT', 'CCATAAACTACAGTTCATGG', 'GGGATCACGTAAGCGATCCC', 'CCGTGGACGGCAGCCCACGG', 'ATAATGATGATAAAGTTTGA', 'AGTAAATGGAAATGGTGAGG', 'CTGGGATGGGATTAGCTGGG', 'CGGGAGCAGGCTGGGTCGGG', reverse_primer]
        seq_array_7_pair = [forward_primer, 'GGAATTTCAAATGAAATTCA', 'AGAGTAGTCATCACTACTCA', 'TGCAGGACAAGTGCCCTACG', 'CGTCGGGCAGCTGCCCGACC', 'ATATTGTTATTGGATTTGGA', 'AGTAAATGGAAATGGTGAGG', 'CTGGGATGGGATTAGCTGGG', 'CGGGAGCAGGCTGGGTCGGG', reverse_primer]
        seq_array_6_pair = [forward_primer, 'TTGATTTCATTTGAAATCTG', 'GAGTCACTTATCAGTAACGA', 'TGGCCAAGGCAACTTGACGG', 'GAGGCTCGTGGCCGAGCCGG', 'ATATTGTTATTGGATTTGGA', 'AGTAAATGGAAATGGTGAGG', 'CTGGGATGGGATTAGCTGGG', 'CGGGAGCAGGCTGGGTCGGG', reverse_primer]
        seq_array_5_pair = [forward_primer, 'AAACATTCAATAGAATGAGA', 'AGGGCTAGAATACTAACTTG', 'GTTGCGACCTCTGTCGCTTG', 'CGAGCGCCTGCAGGCACGGG', 'ATATTGTTATTGGATTTGGA', 'AGTAAATGGAAATGGTGAGG', 'CTGGGATGGGATTAGCTGGG', 'CGGGAGCAGGCTGGGTCGGG', reverse_primer]
        seq_array_4_pair = [forward_primer, 'TATTGTACTATAGTACTTTG', 'ATTGGCAGAATACTACGTGA', 'TTATGCCTCGCCAGGCTGGA', 'GCGTGGACCGTGGCCCTGGG', 'ATATTGTTATTGGATTTGGA', 'AGTAAATGGAAATGGTGAGG', 'CTGGGATGGGATTAGCTGGG', 'CGGGAGCAGGCTGGGTCGGG', reverse_primer]
        seq_array_3_pair = [forward_primer, 'AATTTGACATTAGTCTTTGA', 'ATATTGTCATAAGACGGTCG', 'ATGATGGCGGTTGCCGAGGA', 'CGCGTGGCAGGTGCCGTGGG', 'ATATTGTTATTGGATTTGGA', 'AGTAAATGGAAATGGTGAGG', 'CTGGGATGGGATTAGCTGGG', 'CGGGAGCAGGCTGGGTCGGG', reverse_primer]
            
        plasmid = []   # Store the constructed plasmid
        
        # Use plasmid construction for 8_pair target as an example 
        seq_array = seq_array_8_pair   # Define the target to be selected. You can customize the target to be amplified by changing the seq_array name
        plasmid = forward_primer
        for i in range (1, len(seq_array)-1):
            plasmid += seq_array[i] + 'GAAA'     
        plasmid += reverse_complement(reverse_primer)   # Concatenate sequence to be amplified
        plasmid = "".join(plasmid)
        recomb_plasmid = puCIDT_pre + plasmid + puCIDT_post + puCIDT_pre   # Be inserted in the pUCIDT (Amp) Golden Gate vector
        reverse_recomb_plasmid = reverse_complement(recomb_plasmid)   # The reverse complementary also need to be screened since the plasmid is double-stranded

        gap = 9   # Define the maximum number of continuous overlapped bases that can be tolerated
        
        # Compare repetitive sequence (maximum 10 nt) between [crRNA sequence or forward primer or reverse primer] and complete sequence of plasmid
        pointer = 0   # Refer to position of sequence segment in the seq_array
        while (pointer < len(seq_array)):
            rep = []      # Store the repetitive sequences if appeared once, since the sequence to be screened is definitely contained in the plasmid, there must be one appearance of the sequence
            rep_rep = []  # Therefore we need the real repetitive sequence appeared at least twice in the plasmid, this array stores the sequences that appeared twice
            f_rep = []    # Store the final repetitive sequence without coincidence in the array
            for i in range (0, len(recomb_plasmid)-gap):
                for j in range (0, len(seq_array[pointer])-gap):
                    if recomb_plasmid[i:i+gap+1] == seq_array[pointer][j:j+gap+1]:    
                        for k in range (0, len(rep)):
                            if seq_array[pointer][j:j+gap+1] == rep[k]:
                                rep_rep.append(seq_array[pointer][j:j+gap+1])
                            else:
                                pass
                        rep.append(seq_array[pointer][j:j+gap+1])
                    else:
                        pass
            [f_rep.append(x) for x in rep_rep if x not in f_rep]

            if f_rep != []:
                flag = -1
                break
            else:
                pointer += 1   # Move on to the next sequence segment
            
        if (flag == -1):
            continue
        else:
            pass

        # Identify repetitive sequence (maximum 10 nt) between [crRNA sequence or forward primer or reverse primer] and complete reverse complementary sequence of plasmid
        pointer = 0
        while (pointer < len(seq_array)):
            rep = []
            rep_rep = []
            f_rep = []
            for i in range (0, len(reverse_recomb_plasmid)-gap):
                for j in range (0, len(seq_array[pointer])-gap):
                    if reverse_recomb_plasmid[i:i+gap+1] == seq_array[pointer][j:j+gap+1]:
                        for k in range (0, len(rep)):
                            if seq_array[pointer][j:j+gap+1] == rep[k]:
                                rep_rep.append(seq_array[pointer][j:j+gap+1])
                            else:
                                pass
                        rep.append(seq_array[pointer][j:j+gap+1])
                    else:
                        pass
            [f_rep.append(x) for x in rep_rep if x not in f_rep]

            if f_rep != []:
                flag = -1
                break
            else:
                pointer += 1   # Move on to the next sequence segment
            
        if (flag == -1):
            continue
        else:
            pass
        
        # Calculate Hamming distance between [crRNA sequence or forward primer or reverse primer] and complete sequence of plasmid
        pointer = 0
        while (pointer < len(seq_array)):
            for i in range (0, len(recomb_plasmid)-len(seq_array[pointer])+1):
                hamming = 0
                candidate = recomb_plasmid[i:i+len(seq_array[pointer])]
                for j in range (0, len(seq_array[pointer])):
                    if candidate[j] != seq_array[pointer][j]:
                        hamming += 1   # Hamming distance increase by 1 if bases are detected different

                if 0 < hamming <= len(seq_array[pointer])-19:   # 1) One-base mismatch between CRISPR target and any 20-nt sequence segment in plasmid is not tolerated; 2) Should have more than 13 base different between RPA primers and any 32-nt sequence segment in plasmid  
                    flag = -1    
                    break
                else:
                    pass
            
            if (flag == -1):
                break
            else:
                pointer += 1   # Move on to the next sequence segment
        
        if (flag == -1):
            continue
        else:
            pass
        
        # Compare Hamming distance between [crRNA sequence or forward primer or reverse primer] and complete reverse complementary sequence of plasmid
        pointer = 0
        while (pointer < len(seq_array)):
            for i in range (0, len(reverse_recomb_plasmid)-len(seq_array[pointer])+1):
                hamming = 0
                candidate = reverse_recomb_plasmid[i:i+len(seq_array[pointer])]
                for j in range (0, len(seq_array[pointer])):
                    if candidate[j] != seq_array[pointer][j]:
                        hamming += 1

                if 0 < hamming <= len(seq_array[pointer])-19:   # 1) One-base mismatch between CRISPR target and any 20-nt sequence segment in plasmid is not tolerated; 2) Should have more than 13 base different between RPA primers and any 32-nt sequence segment in plasmid
                    flag = -1   
                    break
                else:
                    pass
            
            if (flag == -1):
                break
            else:
                pointer += 1   # Move on to the next sequence segment
        
        if (flag == -1):
            continue
        else:
            print('RPA forward primer:')
            print(forward_primer)
            print('RPA reverse primer:')
            print(reverse_primer)
            print("Plasmid:")
            print(plasmid)
            return True

similarity_check()


RPA forward primer:
ACTCATTCACACCAACCTCACGCTAGCCACTG
RPA reverse primer:
GCACATACACTTCACTCACCAACAATCTCGCG
Plasmid:
ACTCATTCACACCAACCTCACGCTAGCCACTGAGTATATCTATTGATATACTGAAACCATAAACTACAGTTCATGGGAAAGGGATCACGTAAGCGATCCCGAAACCGTGGACGGCAGCCCACGGGAAAATAATGATGATAAAGTTTGAGAAAAGTAAATGGAAATGGTGAGGGAAACTGGGATGGGATTAGCTGGGGAAACGGGAGCAGGCTGGGTCGGGGAAACGCGAGATTGTTGGTGAGTGAAGTGTATGTGC


True