In [1]:
import random
import primer3
import math
from array import *
import collections
from reedsolo import RSCodec
import itertools

def GCPercent(seq):                       # To calculate GC content of a DNA sequence
    counts = collections.Counter(seq)
    percent = (counts['C'] + counts['G'])/float(len(seq)) 
    return percent

def ternary(n):      # To convert a binary number to a ternary number (front '0's not included)
    if n == 0:
        return '0'
    nums = []
    while n:
        n, r = divmod(n, 3)
        nums.append(str(r))
    return ''.join(reversed(nums))
    
def base3ToOligo(seq):           # Base-3 oligo conversion to map a ternary number to a DNA sequence.(Goldman et al., 2013, Nature)
    base3ToOligo = {'0': 'C', '1': 'T', '2': 'G'}
    base3ToOligoConverted = []
    for i in range (0, len(seq)):
        base3ToOligoConverted.append(base3ToOligo[seq[i]])
        if base3ToOligo[seq[i]] == 'A':
            base3ToOligo = {'0': 'C', '1': 'T', '2': 'G'}
        elif base3ToOligo[seq[i]] == 'C':
            base3ToOligo = {'0': 'T', '1': 'G', '2': 'A'}
        elif base3ToOligo[seq[i]] == 'T':
            base3ToOligo = {'0': 'G', '1': 'A', '2': 'C'}
        elif base3ToOligo[seq[i]] == 'G':
            base3ToOligo = {'0': 'A', '1': 'C', '2': 'T'}
    return ''.join(base3ToOligoConverted)

def DNA_reverse_complement(DNA):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
    return ''.join(complement.get(base, base) for base in reversed(DNA))

primerLibrary = ['ACTCCTCGAATGCTAGCGTTG', 'ACATCGCCATTCCAAGAGCAT', 'AGATTGCACGTGTGTCTGCCT', 'GTGGAACGCTTGACCAACTTG', 'AGATGGCATGGACACGGAGAT', 'GACGAACACTGTTCACACCTG', 'CACGAAGCAAGCTATGACCAC', 'TCAGTTCTCCGTACATGGCTG', 'ACCTCGTGCAGTTGTGGTTGT', 'TGGTCACAGCTGTAAGTGCAT', 'TGCTGGTTACCACATAGCCTG', 'GTCTTGAGTCACGAACCAGGA', 'GTTGCCAACGGACTCTAAGGA', 'TGCTCCTGTCTTGTATCCGTG', 'AGAGCCTGCCATGTTCCAATG', 'TGGATACCAACCGATGCGAAC', 'ACACACTTGCGTTGCTCTTCG', 'AGTGCACCTTGATGTGGTCCT', 'AGTGAACCGTCTTCATGGAGC', 'TCGGTTCATCCACAAGCCTTG', 'TATTGGCATCGCGAACATCCA', 'ATAGCACAGCGTCACGGTTCA', 'AGTTGCTGATAGGTGCTTCGA', 'TCTCCGAGTCTTCACGTCTTG', 'TCCTGCACTTGGTATAGGCCA', 'ACAATGCCTGGTGTTACTCCG', 'ATTGCTGCAATGCTGAACCAG', 'GCATCCAATCAGACAGTCCGA', 'GATCCGAACTGACAGCGAACT', 'AGCGTTATACTGGTCGTTGGT', 'CTATGGCACGTGGAGTTGACT', 'ACTAGGCACCTCACAGTCAGA', 'ACATCTCATCCTGAACGCACA', 'TGTAACGACGCTACTACCACG', 'GAAGTCACAACACGCTGGAGT', 'AGTGCGAAGGAAGCCAACTAG', 'AGAGGAGACCACCAGCTCTTA', 'ACTTGGCATGTCCTACATCGT', 'GAGTTGCACTGCTCCGTCAAT', 'ACCGTAGATCCTCGACAAGCT', 'TCAACTGGTGATTCGTGCAAC', 'AGGCATACAGGAAGCTACCGT', 'TAGCACTACGAACGGCATCCT', 'CTGGTAGTCCGTCATGCTTCA', 'ACCAATCTGTCCACGTTCGAC', 'GTCCAGTGTGAACGTGAAGTG', 'ACTGGAGAGAGCGTCTATTGC', 'AGGTTGACATGTTGAGGCACA', 'AGAAGAGTCCAACGTCGCAAG', 'TCGAAGAATACCGCCTGACCA', 'TCGGATGTCAATGCGCGAATC', 'TGCAATGTCACACTTCACGAC', 'CTGGTGTGTCAGGTACGAACA', 'AGAGTTGGAACACTGCAAGGT', 'GACCTTCATCGGCAACTAGGT', 'AGCACGTACAACACCAGGTAT', 'GCCTTGTCTGTAACCGGAACT', 'TGCTTGTTAGAGGCAGTCGAT', 'CTGAACGTGGCATCTGGAGAT', 'GCTTGGTGACGAATCAGTTGC', 'AGTGGATGCCTTGAAGTGCCA', 'CGGAATCCACACATGCTTCTG', 'ATGGTGATGGTGCATGGAGTG', 'GTTCCAGCGCCAATTATTCCG', 'TCTCGGTGAAGACAGTCAACG', 'CGTTAACTGCCGTAATCACCG', 'TGCGTGTGAAGTTCCTGGATA', 'GAAGAGCTCTGCCATCCTCAT', 'ATCGTTGCTGTCCTGCGTACT', 'ACCAGTGCTTGCCTATACGCT', 'GAATAGCGCATGGACTGAGGT', 'CGCAACTCAGAACAGACAAGC', 'GCACTTATCCTTCGAGCTGCT', 'TCAACTGACGAATCACCACGA', 'GCACTTGAGCCAGAGTATTGC', 'TCAGGTGGCTTGATACGACCA', 'AGAAGACGTCCATCAGTCGCT', 'TGAGCCTTGAACAGAGTTGCG', 'TCTTCGCGTTGATCATCCTCG', 'ATGGAGAAGATGGCTCGTGGA', 'GTTGGAGTCCGAAGCTTGGTA', 'TGCGGTATCGAACGTGTCTGT']
print(len(primerLibrary))


# Primer screening procedures start from here
# Melting temperature between 60-65 ℃
meltingTempExcluded = []
for i in range (0, len(primerLibrary)):
    meltingTemp = primer3.calcTm(primerLibrary[i], mv_conc=0, dv_conc=2.5, dna_conc=4000)
    print(meltingTemp)
    if meltingTemp < 60 or meltingTemp > 70:
        meltingTempExcluded.append(primerLibrary[i])

primerLibrary = [x for x in primerLibrary if (x not in meltingTempExcluded)]    
print(len(primerLibrary))


# No hairpin structure above 37 ℃
hairpinExcluded = []
for i in range (0, len(primerLibrary)):
    res = primer3.calcHairpin(primerLibrary[i], mv_conc=0, dv_conc=2.5, dna_conc=4000)
    if res.tm >= 37:
        hairpinExcluded.append(primerLibrary[i])

primerLibrary = [x for x in primerLibrary if (x not in hairpinExcluded)]           


# No primer homodimers formed above 37 ℃
homodimerExcluded = []
for i in range (0, len(primerLibrary)):
    res = primer3.calcHomodimer(primerLibrary[i], mv_conc=0, dv_conc=2.5, dna_conc=4000)
    if res.tm >= 37:
        homodimerExcluded.append(primerLibrary[i])

primerLibrary = [x for x in primerLibrary if (x not in homodimerExcluded)]  


# No primer heterodimers formed above 37 ℃ 
heterodimerExcluded = []
for i in range (0, len(primerLibrary)):
    for j in range (i+1, len(primerLibrary)):
        res = primer3.calcHeterodimer(primerLibrary[i], primerLibrary[j], mv_conc=0, dv_conc=2.5, dna_conc=4000)
        if res.tm >= 37:
            heterodimerExcluded.append(primerLibrary[j])
    primerLibrary = [x for x in primerLibrary if (x not in heterodimerExcluded)] 
    heterodimerExcluded = []
 

# Hamming distance between any two primers should >= 6 (Organick et al., 2018, Nature Biotechnology)
Hamming = 0
HammingExcluded = []

for i in range (0, len(primerLibrary)):
    for j in range (i+1, len(primerLibrary)):
        if (j >= len(primerLibrary)):
            break
        for k in range (0, 21):
            if primerLibrary[i][k] != primerLibrary[j][k]:
                Hamming += 1
        if Hamming < 6:
            HammingExcluded.append(primerLibrary[j])
        primerLibrary = [x for x in primerLibrary if (x not in HammingExcluded)] 
        HammingExcluded = []
        Hamming = 0
        

# There should be no more than 10 bp inter-sequence complementarity between any two primers 
similarityExcluded = []

for i in range (0, len(primerLibrary)):
        for j in range (i+1, len(primerLibrary)):
            if (j >= len(primerLibrary)):
                break
            for m in range (0, 12):
                for n in range (0, 12):
                    if primerLibrary[i][m:m+10] == DNA_reverse_complement(primerLibrary[j])[n:n+10]:
                        similarityExcluded.append(primerLibrary[j])
            primerLibrary = [x for x in primerLibrary if (x not in similarityExcluded)] 
            similarityExcluded = []

# GC content of primers 45-55%
GCExcluded = []

for i in range (0, len(primerLibrary)):
    if (GCPercent(primerLibrary[i]) < 0.45 or GCPercent(primerLibrary[i]) > 0.55):
        GCExcluded.append(primerLibrary[i])
primerLibrary = [x for x in primerLibrary if (x not in GCExcluded)] 

print(len(primerLibrary))
print(primerLibrary)

'''
%store primerLibrary
'''

82
66.16950712524408
66.2717479449899
68.41191318342356
65.91966893074209
67.24569828250361
65.03212849322972
65.24040308508194
65.56700208797702
68.56939550173689
65.77695928673978
65.9084568194508
65.42671867375566
65.79159610104244
65.56700208797702
66.8220890926375
66.48158699266332
67.6193787938953
67.62576895873144
66.11453074197158
66.67727065417074
66.25881526391117
68.12529703255518
65.26701099911077
65.16394318757114
66.88571974015298
66.40727205319484
65.82685583179602
65.91109275911577
66.1002446548419
65.24479323090418
65.84087979751558
66.13293947837042
65.5401169582322
65.49198708442879
66.53181355480552
66.11453074197158
65.62271040504561
65.2990911623005
66.94156242519728
66.52515838047259
65.08864774675499
66.88108728818281
67.10917891268866
65.56700208797702
66.31939638489746
65.03212849322972
65.35020211316504
66.01619949323066
66.29687889893444
67.07714963033578
67.35420360468606
65.05873752689439
65.6834635984784
65.68322002277182
65.60406083708023
65.215215567944

'\n%store primerLibrary\n'