In [None]:
#import libraries

import pandas as pd
import os


"""
Reverse primer generator for 11mer spike-in synthetic library generation 

TEMPLATE: for 7mer
5' to 3'
gtattccttggttttgaacccaAccggtctgcgcctgtgcMNNMNNMNNMNNMNNMNNMNNttgggcactctggtggtttgtg

primer overhangs for 588 11-mer mutation
5'end of Reverse primer: GTATTCCTTGGTTTTGAACCCAACCGGTCTGCGC
3'end of Reverse primer: ACTCTGGTGGTTTGTGGCCACTTG

"""

In [None]:
#USER PARAMETERS
#copy the path of the destination folder where the input data is stored.
WORKSPACE_DIRECTORY = "example_data"

#For 11mer spike-in sequences, use an excel sheet with list of 11mer sequences as given in the example file.
INPUT_FILE_NAME = "Sequencelist_11mer.xlsx" 
SEQUENCE_COLUMN_NAME = "Sequence"
AMINOACID_COLUMN_NAME = "Aminoacid"
OUTPUT_FILE_NAME = "Primerlist_11mer.xlsx"


In [None]:
#function to generate reverse complementary sequence

def reverse_complement(dna):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A' }
    return ''.join([complement[base] for base in reversed(dna)])

In [None]:
#function to generate sequence with defined mammalian codons that is non-NNK format.
#Alternate codon dictionary: codon optimized for mouse but avoids T and G in the last position
#to keep the codons distinct from the NNK primer. Used Genscript codon table for mouse.

def alternate_codon_dict(aminoacid):
    dict = {'A':'GCC', 'C':'TGC', 'D':'GAC', 'E':'GAA', 'F':'TTC', 'G':'GGC',
            'H':'CAC', 'I':'ATC', 'K':'AAA', 'L':'CTC', 'M':'ATG', 'N':'AAC', 'P':'CCC',
            'Q':'CAA', 'R':'AGA', 'S':'AGC', 'T':'ACC', 'V':'GTC', 'W':'TGG', 'Y':'TAC'}
    return ''.join([dict[codon] for codon in (aminoacid)])

In [None]:
file1 = os.path.join(WORKSPACE_DIRECTORY, INPUT_FILE_NAME)

df = pd.read_excel(file1)
df['Reverse_complement']= df[SEQUENCE_COLUMN_NAME].apply(reverse_complement)  #generates reverse complementary sequence
df['Alternate_codon']= df[AMINOACID_COLUMN_NAME].apply(alternate_codon_dict)  #generates sequence with alternate codons
df['ReverseC_Alt_codon']= df['Alternate_codon'].apply(reverse_complement)  # generates reverse complementary sequence of alternate codons
Rev_threeprime = 'ACTCTGGTGGTTTGTGGCCACTTG'
Rev_fiveprime = 'GTATTCCTTGGTTTTGAACCCAACCGGTCTGCGC'
df['Rev_primer'] = Rev_fiveprime+df.Reverse_complement+Rev_threeprime   #generates reverse primer for input sequences
df['Alternate_Rev_primer']= Rev_fiveprime+df.ReverseC_Alt_codon+Rev_threeprime  #generates reverse primer for alternate codon sequences
df.to_excel(OUTPUT_FILE_NAME)   


#"Rev_primer" and "Alternate_Rev_primer" columns are the final output primers used for synthetic pool library.
