In [6]:
import pandas as pd
from Bio import SeqIO
from Bio import Seq as SeqO
from Bio import SeqRecord


import cfg

def load_top_codons(n): #Generate dictionary of top n codons for each AA
    codon_file_path = cfg.DATA_DIR + cfg.CODON_FREQ_INPUT_FILE
    all_codons_dt = pd.read_csv(codon_file_path) #Dataframe of codon frequencies

    top_n_codons_dt = all_codons_dt.sort_values(by=['aa', 'freq'], ascending=[True, False]).groupby(['aa']).head(n)
    top_n_codons_dt = top_n_codons_dt[['codon','aa']]

    top_n_codons_dict = top_n_codons_dt.groupby('aa')['codon'].apply(list).to_dict()

    return top_n_codons_dict,all_codons_dt

TOP_N_CODONS, ALL_CODONS = load_top_codons(6)

def nnn_fasta_gen(ref_dna, aa_i, library_name, upstream_constant, downstream_constant):
    oligos = []
    oligo_count= 0

    fasta_path  = cfg.OLIGO_DIR + library_name + '.fasta'
    template_fasta =  SeqIO.read(cfg.DATA_DIR + ref_dna, 'fasta')
    dna_i = aa_i * 3

    original_codon = template_fasta[dna_i:dna_i+3]
    original_aa = original_codon.translate().seq

    for aa in TOP_N_CODONS.keys():
        for codon in TOP_N_CODONS[aa]:
            new_dna_seq = template_fasta[:dna_i] + codon + template_fasta[dna_i + 3:]
            oligo_seq = upstream_constant + new_dna_seq + downstream_constant
            oligo_id = library_name + aa + '_OLIGO' + str(oligo_count)

            oligo_info = {
                'id' : oligo_id,
                'position' : aa_i,
                'original_aa': original_aa,
                'new_aa' : aa,
                'new_codon' : codon,
                'new_dna_seq': str(new_dna_seq.seq),
                'oligo_seq' : str(oligo_seq.seq)
            }

            oligos += [oligo_info]
            oligo_count += 1

    oligo_df = pd.DataFrame(oligos)
    oligo_df.to_csv(cfg.OLIGO_DIR + library_name + '_info.csv')


    with open(fasta_path, 'w') as output_handle:
        for oligo in oligo_df.itertuples():
            record = SeqRecord.SeqRecord(
                SeqO.Seq(oligo.oligo_seq),
                id=oligo.id,
                description= '')

            SeqIO.write(record, output_handle, "fasta")

    return

nnn_fasta_gen('trbc_t3_library_template.fasta', 1, 'G102','GAAGGAACTTTTGCGTGTTAGTATGTTGACTACATGCTCTGGTACAATACATACGTGTTCCGGCTGTTATCCTGCATCGGAACCTCAATCATGCATGAAGGCTCGCCAGGTCTCACCAGTTT','ATTCAGAGACCAACGCCAGGG')

nnn_fasta_gen('trbc_t3_library_template.fasta', 11, 'D112','TGAAGGCTCGCCAGGTCTCACCAGTTT','ATTCAGAGACCAACGCCAGGG')

nnn_fasta_gen('trbc_t3_library_template.fasta', 15, 'P116','TACATGCTCTGGTACAATACATACGTGTTCCGGCTGTTATCCTGCATCGGAACCTCAATCATGCATGAAGGCTCGCCAGGTCTCACCAGTTT','ATTCAGAGACCAACGCCAGGGTAC')
