## INSTRUCTIONS
--------------

1. Copy & paste the file `input-template.txt` in the folder `molbio-cookie-cutter/` and rename it as `input.txt`.
2. Open `input.txt`, replace the default amino acid sequence with your amino acid sequence, and then replace the default mutations with your mutations, and then adjust the other parameters to your need.
3. Save `input.txt`.
4. Open `cookie_cutter.ipynb`, click the icon of double triangles to restart Kernel and run all cells.
5. Scroll down to the very bottom of this script to view the Run status and wait for the Run to complete. Depending on your sequence and your parameters, it might take a while to run. When the Run is completed, you will see "RUN COMPLETED!" at the very bottom of the script.
6. When the Run is complete, go to `molbio-cookie-cutter/output/results/` to see your results.


## IMPORT REQUIRED RESOURCES
--------------

In [1]:
"""Imports"""
import json
import re
import sys
import os

SCRIPT_DIR = os.path.dirname(os.path.abspath('__file__'))
sys.path.append(os.path.dirname(SCRIPT_DIR))
print(f'Here is: {SCRIPT_DIR}')

from experiments.partition_search_opt.partition_search_config import *

from proseqteleporter.random_sample_generator.random_sample_generator import random_sample_generator
from proseqteleporter.partitioner.partitioner import compute_best_partitions, get_lowest_cost_from_best_partitions
from proseqteleporter.utils.utils import one_indexing_to_zero_indexing, include_linked_mutations_into_mutations, print_available_resources, annotate_sequence_mutations

Here is: C:\Users\GOFKV\PycharmProjects\proseqteleporter\experiments\partition_search_opt
--------------------------------------------------------------------------------------------------
Load input parameters.
--------------------------------------------------------------------------------------------------
Load ALLOWED_CUT_POSITIONS=[]
Load DNA_5_PRIME=TTTGGTCTCTAATG
Load DNA_3_PRIME=TAATTGAGACCTTT
Load FIX_DNA_SEQUENCE=
Load HOST=c_griseus
Load FIDELITY_DATA_PATH=C:\Users\GOFKV\PycharmProjects\proseqteleporter\data\neb_fidelity_data\FileS01_T4_01h_25C.xlsx
Load SEQUENCE=WAFTGDYSKVWMAWPISCMCYSNENKLESRHKHPMKVRSTQMKMSFQQKE
Load MUTATIONS=[{'position': 1, 'aa': ['S', 'I']}, {'position': 8, 'aa': ['G', 'E', 'R', 'K', 'T', 'Q']}, {'position': 10, 'aa': ['Q', 'T', 'F', 'P', 'K', 'D', 'C']}, {'position': 14, 'aa': ['E', 'I', 'V', 'H']}, {'position': 26, 'aa': ['M', 'I']}, {'position': 27, 'aa': ['Y', 'M']}, {'position': 28, 'aa': ['T', 'H', 'W', 'I', 'L']}, {'position': 32, 'aa': ['Q', 'E'

## AVAILABLE HOSTS & ASSEMBLY CONDITIONS
--------------

In [2]:
print_available_resources()

--------------------------------------------------------------------------------------------------
|                                    AVAILABLE HOSTS                                             |
--------------------------------------------------------------------------------------------------
c_griseus
e_coli
p_pastoris
s_cerevisiae
--------------------------------------------------------------------------------------------------
|                          AVAILABLE ASSEMBLY CONDITIONS                                         |
--------------------------------------------------------------------------------------------------
T4_01h_25C
T7_18h_25C
T7_18h_37C
--------------------------------------------------------------------------------------------------


## RUN & SHOW RESULTS

In [3]:
def prepare_output_dirs(output_dir):
    """create output dirs if not exist"""

    log_dir = os.path.join(output_dir, 'logs')
    result_dir = os.path.join(output_dir, 'results')
    dirs_to_create = [output_dir, log_dir, result_dir]
    for directory in dirs_to_create:
        if not os.path.exists(directory):
            os.makedirs(directory)

    # clean log directory before starting the run
    for f in os.listdir(log_dir):
        if not re.search('ipynb_checkpoints',f):
            os.remove(join(log_dir,f))
    return log_dir, result_dir

In [4]:
"""Define function - Main()"""
def main(mutations_1idx, linked_mutations_1idx, s, generate_random_input, number_of_loops, supress_output, output_dir, pre_distribute_mutations_lst,cost_scan_lst,one_dist_lst, cond_lst):

    log_dir, result_dir = prepare_output_dirs(output_dir)

    for loop in range(0,number_of_loops):
        loop+=1
        print(f'\033[1m--------------------------------------------------------------------------------------------------\033[0m')
        print(f'                                        \033[1m RUN {loop}\033[0m                               ')
        print(f'\033[1m--------------------------------------------------------------------------------------------------\033[0m')
        if generate_random_input:
            # replace sequence and mutations with output of random_sample_generator()
            print('--------------------------------------------------------------------------------------------------')
            print(f'Replacing default sequence and mutations with randomly generated sequence and mutations.')
            print('--------------------------------------------------------------------------------------------------')

            s, mutations_1idx, linked_mutations_1idx = random_sample_generator(
                min_aa_length=loop*25 + 50, max_aa_length=loop*25 + 50,
                min_number_of_positions=6, max_number_of_positions=len(s) // 5,
                min_variations_per_position=1, max_variations_per_position=10,
                max_positions_per_linked_mutation_set=0, max_number_of_mutation_linked_mutation_sets=0
            )

        mutations_1idx = include_linked_mutations_into_mutations(mutations_1idx, linked_mutations_1idx)

        mutations_0idx, linked_mutations_0idx = one_indexing_to_zero_indexing(mutations_1idx=mutations_1idx,
                                                                              linked_mutations_1idx=linked_mutations_1idx)

        s_annotated = annotate_sequence_mutations(s, mutations_0idx, linked_mutations_0idx)
        print(f'\033[1mSequence (red: independent mutation positions; other colors: linked mutation positions) = '
              f'\n{"".join(s_annotated.values())}')
        print(f'\033[1mSequence Length = {len(s)}')
        print(f'\033[1mMutations (1-indexed) = {mutations_1idx}')
        print(f'\033[1mLinked mutations (1-indexed) = {linked_mutations_1idx}')

        for cost_scan, pre_distribute_mutations, one_dist, cond in zip(cost_scan_lst, pre_distribute_mutations_lst, one_dist_lst, cond_lst):
            params = dict(
                s=s,
                mutations_0idx=mutations_0idx,
                linked_mutations_0idx=linked_mutations_0idx,
                cut_number_range=CUT_NUMBER_RANGE,
                fidelity_data_path=FIDELITY_DATA_PATH,
                min_aa_length=MIN_FRAGMENT_LENGTH,
                provider_max_dna_length=1500,
                fusion_sites_used_by_backbone=FUSION_SITES_USED_BY_BACKBONE,
                max_cost=MAX_COST,
                max_unevenness=MAX_LENGTH_UNEVENNESS,
                min_ligation_fidelity=MIN_LIGATION_FIDELITY,
                satisfaction_fidelity=SATISFACTION_LIGATION_FIDELITY,
                log_dir=log_dir,
                supress_output=supress_output,
                search_method="BFS",
                codon_usage_tbl_dir=join(dirname(dirname(FIDELITY_DATA_PATH)),'codon_usage'),
                host=HOST,
                sort_by_cost=True,
                codon_table=CODON_TABLE,
                enzyme='BsaI',
                allowed_cut_positions_1idx=ALLOWED_CUT_POSITIONS,
                cost_scan=cost_scan,
                pre_distribute_mutations=pre_distribute_mutations,
                one_dist = one_dist
            )
            best_partitions_by_cut_number = compute_best_partitions(**params)
            lowest_cost_partition = get_lowest_cost_from_best_partitions(
                best_partitions_by_cut_number=best_partitions_by_cut_number, supress_output=supress_output
            )
            for item, name_ in zip([lowest_cost_partition, best_partitions_by_cut_number, params],
                                   ['lowest_cost_partition', 'best_partitions_by_cut_number', 'params']):
                output_path = join(result_dir, f'{name_}_{cond}_{loop}.json')
                with open(output_path, 'w') as fp:
                    json.dump(item, fp)
                    print('--------------------------------------------------------------------------------------------------')
                    print(f'\033[1m Result "{name_}" is exported at:\n {output_path}\033[0m')
            print(f'\n\033[1m=================================================================================================='
                  f'\n                                     RUN COMPLETED!                                                      '
                  f'\n             Total elapsed time: {best_partitions_by_cut_number["total_elapsed_time"]} seconds'
                  f'\n==================================================================================================\033[0m')


In [5]:
"""Run"""
if __name__ == "__main__":
    print('\033[1m==================================================================================================\033[0m')
    print('                                    \033[1m RUN STARTED! \033[0m ')
    print('\033[1m==================================================================================================\033[0m')

    main(mutations_1idx=MUTATIONS,
         linked_mutations_1idx=LINKED_MUTATIONS,
         s=SEQUENCE,
         generate_random_input=True,
         number_of_loops=5,
         supress_output=True,
         output_dir=join(dirname(abspath('__file__')),f'random_inputs_20240522'),
         pre_distribute_mutations_lst=[False, True, True, True, False],
         cost_scan_lst=[True, False, False, True, False],
         one_dist_lst=[False, False, True, True, False],
         cond_lst=['cost_scan','dist_mut','dist_mut_1','dist_mut_1_cost_scan', 'exhaustive'])

                                    [1m RUN STARTED! [0m 
[1m--------------------------------------------------------------------------------------------------[0m
                                        [1m RUN 1[0m                               
[1m--------------------------------------------------------------------------------------------------[0m
--------------------------------------------------------------------------------------------------
Replacing default sequence and mutations with randomly generated sequence and mutations.
--------------------------------------------------------------------------------------------------
Random amino acid sequence of length 75: YPWCIVGGSPIVGARAWIQAVHCEPQMWEFQFPIVLRRIEYDVTKWNHNILNQNSSYSSCCACDGLRFIRDGISA

Random 1-indexed mutations at 9 positions: 
 [{'position': 6, 'aa': ['A', 'I', 'W', 'S', 'D', 'K', 'N', 'P']}, {'position': 10, 'aa': ['G', 'A', 'Q', 'S', 'M', 'T']}, {'position': 18, 'aa': ['M', 'L', 'V']}, {'position': 28, 'aa': ['Y'


KeyboardInterrupt

