In [76]:
import sys
import os
from os.path import join

SCRIPT_DIR = os.path.dirname(os.path.abspath('__file__'))
sys.path.append(os.path.dirname(SCRIPT_DIR))
print(f'Here is: {SCRIPT_DIR}')

from proseqteleporter.fragment_assembler.plate_mapper import make_and_validate_plate_mapping_sheet, make_desired_variant_list_from_a_list_of_mutations
from proseqteleporter.utils.load_input_params import load_input_params
from proseqteleporter.post_partition_processor.post_partition_processor import validate_partitioned_fragments_by_insilico_assembly
from proseqteleporter.post_partition_processor.post_partition_processor import import_mutant_dna_fragments_from_module_sheet


INPUT_DICT = {
    'ecoli_AzamiGreen':{
        'abbreviation':'eco_AzG',
        'input_file':join(SCRIPT_DIR, 'input_ecoli2', 'azami_green_blue_input_corr2_5f.txt'),
        'results_dir':join(SCRIPT_DIR, 'input_ecoli2','azami_green_blue_2024-06-16output','results'),
        'incl_wt_MUTATIONS_1dx':'all'
    },
    'ecoli_AzamiRed10':{
        'abbreviation':'eco_AzR',
        'input_file':join(SCRIPT_DIR, 'input_ecoli2', 'azami_red10_blue_input_corr2_5f_lim_cut.txt'),
        'results_dir':join(SCRIPT_DIR, 'input_ecoli2','azami_red10_blue_input_corr2_5f_lim_cut_2024-06-19output','results'),
        'incl_wt_MUTATIONS_1dx':'all'
    },
    'ecoli_TagRFP':{
        'abbreviation':'eco_TagR',
        'input_file':join(SCRIPT_DIR, 'input_ecoli2', 'tag_rfp_blue_input_corr2_5f.txt'),
        'results_dir':join(SCRIPT_DIR, 'input_ecoli2','tag_rfp_blue_2024-06-16output','results'),
        'incl_wt_MUTATIONS_1dx':[{'position': 67, 'aa': ['L', 'H']},{'position': 71, 'aa': ['K']},{'position': 84, 'aa': ['W', 'L']},{'position': 147, 'aa': ['F', 'I']},{'position': 162, 'aa': ['A', 'I', 'N']},{'position': 178, 'aa': ['A', 'I']},{'position': 201, 'aa': ['F', 'Y']}]
    }
}

Here is: C:\Users\GOFKV\PycharmProjects\proseqteleporter\experiments\fluorescence_protein


## Set Input

In [77]:
# sel_input = 'ecoli_AzamiGreen'
# sel_input = 'ecoli_AzamiRed10'
sel_input = 'ecoli_TagRFP'

In [78]:
input_file_path=INPUT_DICT[sel_input]['input_file']

module_sheet_path=join(INPUT_DICT[sel_input]['results_dir'],'order_modules.xlsx')

input_params = load_input_params(
    input_file_path=input_file_path,
    supress_output=False)

s, mutations_1idx, linked_mutations_1idx = input_params['s'], input_params['mutations_1idx'], input_params['linked_mutations_1idx']


Load ALLOWED_CUT_POSITIONS=[]
Load DNA_5_PRIME=AATTTGGTCTCTCC
Load DNA_3_PRIME=TAATAGAGACCTTTAA
Load FIX_DNA_SEQUENCE=ATGGTTAGCAAAGGTGAAGAACTGATCAAAGAAAACATGCATATGAAACTGTACATGGAAGGCACCGTGAATAACCATCACTTTAAATGTACCAGCGAAGGTGAAGGTAAACCGTATGAAGGCACCCAGACCATGCGTATTAAAGTTGTTGAAGGTGGTCCGCTGCCGTTTGCATTTGATATTCTGGCAACCAGCTTTATGTATGGTAGCCGTACCTTTATCAATCATACCCAGGGTATTCCGGATTTCTTTAAACAGAGCTTTCCGGAAGGTTTTACCTGGGAACGTGTTACCACCTATGAAGATGGTGGTGTTCTGACCGCAACACAGGATACCAGTCTGCAGGATGGTTGTCTGATTTATAATGTGAAAATTCGCGGTGTGAACTTTCCGAGCAATGGTCCGGTTATGCAGAAAAAGACCTTAGGTTGGGAAGCAAATACCGAAATGCTGTATCCGGCAGATGGTGGCCTGGAAGGTCGTAGCGATATGGCACTGAAACTGGTTGGTGGTGGTCATCTGATTTGTAACTTTAAAACCACCTACCGTAGCAAGAAACCTGCCAAAAATCTGAAAATGCCTGGCGTGTATTATGTGGATCATCGTCTGGAACGCATTAAAGAAGCAGATAAAGAAACCTATGTGGAACAGCATGAAGTTGCAGTTGCACGTTATTGTGATCTGCCGAGCAAACTGGGTCATAAACTGAAT
Load FUSION_SITES_USED_BY_BACKBONE=('CTTG', 'TAGC', 'CCAT', 'CGCT', 'TAAT')
Load HOST=e_coli
Load FIDELITY_DATA_PATH=C:\Users\GOFKV\PycharmProjects\proseqteleporter\prose

In [79]:
imported_mutant_dna_fragments = import_mutant_dna_fragments_from_module_sheet(
    module_sheet_path=module_sheet_path
)

validate_partitioned_fragments_by_insilico_assembly(
    mutant_dna_fragments=imported_mutant_dna_fragments,
    sample_number=50,
    wt_seq=input_params['s'],
    enzyme='BsaI',
    five_prime_dna=input_params['five_prime_dna'],
    three_prime_dna=input_params['three_prime_dna'],
    coding_start=None
)

Validating mutant dna fragments by in-silico assembly...
Random combi in-silico assembly: N24D_M67H_P131T_R71K_F84L_N147I_M150T_G172S_C176A_F178I_H201Y_D210N_K211N, 
  Validation Passed: True
Random combi in-silico assembly: N24D_M67L_P131T_N147F_M150T_S162N_G172S_C176A_F178I_H201F_D210N_K211N, 
  Validation Passed: True
Random combi in-silico assembly: N24D_P131T_R71K_M150T_S162N_G172S_C176A_F178I_H201F_D210N_K211N, 
  Validation Passed: True
Random combi in-silico assembly: N24D_P131T_R71K_F84L_M150T_G172S_C176A_D210N_K211N, 
  Validation Passed: True
Random combi in-silico assembly: N24D_M67L_P131T_F84L_N147I_M150T_G172S_C176A_D210N_K211N, 
  Validation Passed: True
Random combi in-silico assembly: N24D_M67H_P131T_F84W_N147F_M150T_G172S_C176A_H201Y_D210N_K211N, 
  Validation Passed: True
Random combi in-silico assembly: N24D_M67L_P131T_F84L_M150T_G172S_C176A_F178I_H201Y_D210N_K211N, 
  Validation Passed: True
Random combi in-silico assembly: N24D_M67L_P131T_M150T_G172S_C176A_F178A_D

True

In [80]:
## if encounter error msg: Can't find workbook in OLE2 compound document, remove excel sensitivity label and try again.
## ref: https://stackoverflow.com/questions/45725645/pandas-unable-to-open-this-excel-file

if INPUT_DICT[sel_input]['incl_wt_MUTATIONS_1dx'] == 'all':
    positions_include_wt_aa_1idx = [mut['position'] for mut in mutations_1idx]
else:
    positions_include_wt_aa_1idx = [mut['position'] for mut in INPUT_DICT[sel_input]['incl_wt_MUTATIONS_1dx']]

probable_variants = make_desired_variant_list_from_a_list_of_mutations(
    mutations_1idx=mutations_1idx,
    s=input_params['s'],
    positions_include_wt_aa=positions_include_wt_aa_1idx
)

print(f'Number of teleportable variants: {len(probable_variants)}')


Number of teleportable variants: 1944


In [81]:

make_and_validate_plate_mapping_sheet(
    desired_variant_muts_list=probable_variants,
    desired_variant_names=None,
    fragment_sheet_path=module_sheet_path,
    plate_format=96,
    start_plasmid_id=None,
    output_dir=os.path.dirname(module_sheet_path)
)



The plate mapping sheet is validated!

Plate mapping sheet is exported to:
 C:\Users\GOFKV\PycharmProjects\proseqteleporter\experiments\fluorescence_protein\input_ecoli2\tag_rfp_blue_2024-06-16output\results\2024-06-24_source_destination_plate_map.xlsx

Preview of mapping sheet: 
   Source Plate   Target Plate Source Well Target Well                 Volume  \
0       plate1  target_plate1          A2          A1  PLEASE FILL IN VOLUME   
1       plate1  target_plate1          A8          A1  PLEASE FILL IN VOLUME   
2       plate1  target_plate1          B3          A1  PLEASE FILL IN VOLUME   

                      Module Module Plasmid Number  \
0             1-68_N24D_M67L              default1   
1     69-132_P131T_R71K_F84W              default1   
2  133-165_N147F_M150T_S162A              default1   

                                 Target Variant Name  
0  D210N_N24D_C176A_F84W_K211N_R71K_P131T_M67L_M1...  
1  D210N_N24D_C176A_F84W_K211N_R71K_P131T_M67L_M1...  
2  D210N_N24D_