## dTomato Assay combinatorics
### Load promoter & Terminator library and GFP sequence

In [1]:
import os
os.chdir("..")

from teemi.design.combinatorial_design import DesignAssembly
from src.smart_functions import read_fasta_to_dseqrecords
from IPython.display import display

In [3]:
promoters_fa = r'data/promoter_terminator_library/promoter_library.fasta'
terminators_fa = r'data/promoter_terminator_library/terminator_library.fasta'

m_paba_fa = r'data/insert_sequences/PABA.fasta'
dTomato_fa = r'data/insert_sequences/dTomato_non_optimized.fasta'

promoters, promoter_names = read_fasta_to_dseqrecords(promoters_fa)
cds_records, cds_names     = read_fasta_to_dseqrecords(dTomato_fa)
terminators, terminator_names = read_fasta_to_dseqrecords(terminators_fa)
m_paba, m_paba_names = read_fasta_to_dseqrecords(m_paba_fa)


list_of_seqs  = [[m_paba[0]], promoters, [cds_records[0]], terminators, [m_paba[1]]]

print(f"Promoters: {len(promoters)}, CDS: {len(cds_records)}, Terminators: {len(terminators)}")
if promoters:
    print("Example promoter length:", len(promoters[0].seq))
if cds_records:
    print("Example CDS length:", len(cds_records[0].seq))

Promoters: 19, CDS: 1, Terminators: 9
Example promoter length: 1000
Example CDS length: 702


In [4]:
TARGET_TM = 65
LIMIT = 13
OVERLAP = 35

design = DesignAssembly(list_of_seqs, list_of_pads=[], positions_of_pads=[], target_tm=TARGET_TM, limit=LIMIT, overlap=OVERLAP)
variants_df = design.show_variants_lib_df()          
primers_df  = design.primer_list_to_dataframe()      
pcrs_df = design.pcr_list_to_dataframe()


out_dir = os.getcwd()
variants_csv = os.path.join(out_dir, "data/constructs/flouresence_variants_library.csv")
primers_csv  = os.path.join(out_dir, "data/constructs/flouresence_primers_list.csv")
pcrs_csv= os.path.join(out_dir, "data/constructs/flouresence_pcr_plan.csv")

variants_df.to_csv(variants_csv, index=False)
primers_df.to_csv(primers_csv, index=False)
pcrs_df.to_csv(pcrs_csv, index=False)

print(f"Variants: {len(variants_df)}")
print(f"Primers:  {len(primers_df)}")
print(f"PCRs:     {len(pcrs_df)}")
display(variants_df.head())
display(primers_df.head())
display(pcrs_df.head())

Variants: 171
Primers:  110
PCRs:     217


Unnamed: 0,0,1,2,3,4,Systematic_name,Variant
0,PABA-UP,PKG1_promoter,dTomato,PKG1_terminator,PABA-DW,"(1, 1, 1, 1, 1)",0
1,PABA-UP,PKG1_promoter,dTomato,ADH1_terminator,PABA-DW,"(1, 1, 1, 2, 1)",1
2,PABA-UP,PKG1_promoter,dTomato,TDH3_terminator,PABA-DW,"(1, 1, 1, 3, 1)",2
3,PABA-UP,PKG1_promoter,dTomato,ACT_terminator,PABA-DW,"(1, 1, 1, 4, 1)",3
4,PABA-UP,PKG1_promoter,dTomato,TEF1_terminator,PABA-DW,"(1, 1, 1, 5, 1)",4


Unnamed: 0,id,anneals to,sequence,annealing temperature,length,price(DKK),description,footprint,len_footprint
0,P001,PABA-UP,"(T, T, C, T, T, C, T, G, G, C, A, T, C, T, T, ...",65.35,24,43.2,Anneals to PABA-UP,"(T, T, C, T, T, C, T, G, G, C, A, T, C, T, T, ...",24
1,P002,PABA-UP,"(A, T, C, A, C, G, A, C, C, A, G, A, T, A, A, ...",64.98,39,70.2,"Anneals to PABA-UP, overlaps to 1036bp_PCR_prod","(C, C, T, C, T, C, T, T, A, C, T, C, C, C, G, ...",21
2,P003,PKG1_promoter,"(G, G, A, C, G, G, G, A, G, T, A, A, G, A, G, ...",65.62,42,75.6,"Anneals to PKG1_promoter, overlaps to PABA-UP","(G, T, G, T, T, A, T, C, T, G, G, T, C, G, T, ...",24
3,P004,PKG1_promoter,"(C, T, C, G, C, C, C, T, T, G, C, T, C, A, C, ...",66.16,46,82.8,"Anneals to PKG1_promoter, overlaps to 738bp_PC...","(T, G, T, G, G, A, T, T, G, T, G, A, A, G, A, ...",28
4,P005,dTomato,"(T, C, T, T, C, T, T, C, A, C, A, A, T, C, C, ...",66.25,37,66.6,"Anneals to dTomato, overlaps to PKG1_promoter","(A, T, G, G, T, G, A, G, C, A, A, G, G, G, C, ...",19


Unnamed: 0,pcr_number,template,forward_primer,reverse_primer,f_tm,r_tm
0,PCR1,PABA-UP,P001,P002,65.35,64.98
1,PCR2,PKG1_promoter,P003,P004,65.62,66.16
2,PCR3,dTomato,P005,P006,66.25,66.73
3,PCR4,PKG1_terminator,P007,P008,64.79,65.26
4,PCR5,PABA-DW,P009,P010,64.55,64.57
