### random tests

In [None]:
from rdkit import Chem

In [None]:
from biosynth_pipeline import biosynth_pipeline

In [None]:
def select_extenders(extenders):
    
    molecules_data = [
    {"smiles": "O=C(O)CC(=O)[S]", "id": "Malonyl-CoA", "type": "CoA", "shortName": "mal"},
    {"smiles": "C[C@@H](C(=O)O)C(=O)[S]", "id": "Methylmalonyl-CoA", "type": "CoA", "shortName": "mmal"},
    {"smiles": "C=CC[C@@H](C(=O)O)C(=O)[S]", "id": "Allylmalonyl-CoA", "type": "CoA", "shortName": "allylmal"},
    {"smiles": "CO[C@@H](C(=O)O)C(=O)[S]", "id": "Methoxymalonyl-CoA", "type": "CoA", "shortName": "mxmal"},
    {"smiles": "CC[C@@H](C(=O)O)C(=O)[S]", "id": "Ethylmalonyl-CoA", "type": "CoA", "shortName": "emal"},
    {"smiles": "CCCC[C@@H](C(=O)O)C(=O)[S]", "id": "Butyrylmalonyl-CoA", "type": "CoA", "shortName": "butmal"},
    {"smiles": "OC(C(C([S])=O)O)=O", "id": "Hmal", "type": "CoA", "shortName": "hmal"},
    {"smiles": "CC(C)[C@@H](C([S])=O)C(O)=O", "id": "Isobutyrylmalonyl-CoA", "type": "CoA", "shortName": "isobutmal"},
    {"smiles": "CC(C)[C@H](C([S])=O)C(O)=O", "id": "D-isobutyrylmalonyl-CoA", "type": "CoA", "shortName": "d-isobutmal"},
    {"smiles": "ClC1=C(Cl)NC=C1CCCCC(C(O)=O)C([S])=O", "id": "DCP", "type": "CoA", "shortName":"DCP"},
    {"smiles": "CCCCCC[C@@H](C(=O)O)C(=O)[S]", "id":"Hexylmalonyl-CoA", "type":"CoA", "shortName": "hexmal"}]
    
    # Create a set of all valid shortNames
    valid_short_names = {molecule['shortName'] for molecule in molecules_data}
    
    with open("../biosynth_pipeline/retrotide/data/extenders.smi", "w") as file:
        file.write("smiles\tid\ttype\tshortName\n")
        
        if extenders == "all":
            for molecule in molecules_data:
                smiles = molecule["smiles"]
            
                mol = Chem.MolFromSmiles(smiles)
                if mol is None:
                    print(f"Warning: Invalid SMILES string for {molecule['id']}")
                    continue

                file.write(f"{smiles}\t{molecule['id']}\t{molecule['type']}\t{molecule['shortName']}\n")
                
        else:
            for extender in extenders:
                if extender not in valid_short_names: # Check if the shortName is valid
                    raise ValueError(f"Invalid shortName provided: '{extender}'")
            
            for molecule in molecules_data:
                if molecule["shortName"] in extenders:
                    smiles = molecule["smiles"]
                    
                    mol = Chem.MolFromSmiles(smiles)
                    if mol is None:
                        print(f"Warning: Invalid SMILES string for {molecule['id']}")
                        continue
                    
                    file.write(f"{smiles}\t{molecule['id']}\t{molecule['type']}\t{molecule['shortName']}\n")

            
    print("Extenders SMI file created successfully.")

In [None]:
def select_starters(starters):
    
    molecules_data = [
    {"smiles": "CC(=O)[S]", "id": "Acetyl-CoA", "type": "CoA", "shortName": "Acetyl-CoA"},
    {"smiles": "CCC(=O)[S]", "id": "Propionyl-CoA", "type": "CoA", "shortName": "prop"},
    {"smiles": "CC(=O)[S]", "id": "Malonyl-CoA", "type": "CoA", "shortName": "mal"},
    {"smiles": "CCC(=O)[S]", "id": "Methylmalonyl-CoA", "type": "CoA", "shortName": "mmal"},
    {"smiles": "C=CCC(=O)[S]", "id": "Allylmalonyl-CoA", "type": "CoA", "shortName": "allylmal"},
    {"smiles": "COCC(=O)[S]", "id": "Methoxymalonyl-CoA", "type": "CoA", "shortName": "mxmal"},
    {"smiles": "CCCC(=O)[S]", "id": "Butyrylmalonyl-CoA", "type": "CoA", "shortName": "butmal"},
    {"smiles": "CC(C)C(=O)[S]", "id": "Isobutyrylmalonyl-CoA", "type": "CoA", "shortName": "isobutmal"},
    {"smiles": "CCC(C)C(=O)[S]", "id": "2-methylbutyrylmalonyl-CoA", "type": "CoA", "shortName": "2metbutmal"},
    {"smiles": "[S]C(C1=CNC(Cl)=C1Cl)=O", "id": "DCP", "type": "CoA", "shortName": "DCP"},
    {"smiles": "CC(=O)[S]", "id": "cemal", "type": "CoA", "shortName": "cemal"},
    {"smiles": "C1CCCCC1C(=O)[S]", "id": "CHC-CoA", "type": "CoA", "shortName": "CHC-CoA"},
    {"smiles": "C1CC[C@@H](C(=O)O)[C@@H]1C(=O)[S]","id": "trans-1,2-CPDA", "type": "CoA", "shortName": "trans-1,2-CPDA"},
    {"smiles": "C1(=O)C(=CCC1)C(=O)[S]", "id": "cyclopentene", "type": "CoA", "shortName": "cyclopentene"},
    {"smiles": "P[S]C(C1=CC=CN1)=O", "id": "pyr", "type": "CoA", "shortName": "pyr"},
    {"smiles": "O=C([S])/C=C/C1=CC=CC=C1", "id": "cin", "type": "CoA", "shortName": "cin"},
    {"smiles": "[S]C(C1=CC(O)=CC(N)=C1)=O", "id": "ABHA", "type": "CoA", "shortName": "ABHA"},
    {"smiles": "CC(CC([S])=O)C", "id": "isoval", "type": "CoA", "shortName": "isoval"},
    {"smiles": "NC1=CC=C(C([S])=O)C=C1", "id": "PABA", "type": "CoA", "shortName": "PABA"},
    {"smiles": "NC(NCC([S])=O)=[NH2+]", "id": "guan", "type":"CoA", "shortName": "guan"},
    {"smiles": "CC1=NC(C([S])=O)=CS1", "id": "mthz", "type": "CoA", "shortName": "mthz"},
    {"smiles": "O[C@H]1[C@H](O)CCC(C([S])=O)C1", "id": "DHCH", "type": "CoA", "shortName": "DHCH"},
    {"smiles": "O[C@H]1[C@H](O)CC=C(C([S])=O)C1", "id":"DHCHene", "type":"CoA", "shortName": "DHCHene"},
    {"smiles": "O=C([S])CC1=CC=CC=C1", "id": "plac", "type":"CoA", "shortName": "plac"},
    {"smiles": "[S]C(C1=CC=CC=C1)=O", "id": "benz", "type":"CoA", "shortName": "benz"},
    {"smiles": "[S]C(C1=CC=C([N+]([O-])=O)C=C1)=O", "id": "PNBA", "type":"CoA", "shortName":"PNBA"},
    {"smiles": "[S]C([C@@H](CC)C(N)=O)=O", "id": "ema", "type":"CoA", "shortName":"ema"},
    {"smiles": "[S]C([C@@H](C)CNC([C@@H](N)C)=O)=O", "id": "3measp", "type":"CoA", "shortName":"3measp"}]
    
    # Create a set of all valid shortNames
    valid_short_names = {molecule['shortName'] for molecule in molecules_data}
    
    with open("../biosynth_pipeline/retrotide/data/starters.smi", "w") as file:
        file.write("smiles\tid\ttype\tshortName\n")
        
        if starters == "all":
            for molecule in molecules_data:
                smiles = molecule["smiles"]
            
                mol = Chem.MolFromSmiles(smiles)
                if mol is None:
                    print(f"Warning: Invalid SMILES string for {molecule['id']}")
                    continue
                file.write(f"{smiles}\t{molecule['id']}\t{molecule['type']}\t{molecule['shortName']}\n")
                
        else:
            for starter in starters:
                if starter not in valid_short_names: # Check if the shortName is valid
                    raise ValueError(f"Invalid shortName provided: '{starter}'")
                    
            for molecule in molecules_data:
                if molecule["shortName"] in starters:
                    smiles = molecule["smiles"]
                    
                    mol = Chem.MolFromSmiles(smiles)
                    if mol is None:
                        print(f"Warning: Invalid SMILES string for {molecule['id']}")
                        continue
                    
                    file.write(f"{smiles}\t{molecule['id']}\t{molecule['type']}\t{molecule['shortName']}\n")

            
    print("Starters SMI file created successfully.")            

In [None]:
select_extenders(["mal","mmal","allylmal"])

In [None]:
select_starters(["mal","mmal","allylmal"])

In [None]:
from biosynth_pipeline import retrotide
from biosynth_pipeline.retrotide import structureDB, designPKS

In [None]:
from rdkit import Chem
target = ('C[C@H]1[C@H](C[C@H](O)/C=C\[C@@H]([C@H](O)[C@H](/C=C(C[C@@H]([C@@H](O)[C@@H]([C@@H](OC(N)=O)[C@H](/C=C\C=C)C)C)C)/C)C)C)OC([C@H](C)[C@H]1O)=O')

designs = designPKS(Chem.MolFromSmiles(target))
#repr(designs[-1][0][0].modules)

In [None]:
designs[-1][0][0].computeProduct(structureDB)

In [None]:
designs[-1][0][0].modules

# Biosynth Pipeline Demo 1

#### Biosynthesis of propionic acid starting with PKSs and 2 steps of non-PKS modifications

Authors: Tyler Backman and Yash Chainani

This is the first notebook to demonstrate using the `biosynth_pipeline` software which combines Retrotide and Pickaxe into a single biosynthesis pipeline. In this example, Retrotide performs a forward synthesis of the target product first chimeric type I Polyketide Synthases (PKSs). If PKSs/ Retrotide cannot reach this target, then Pickaxe will take over and find non-PKS pathways comprising regular, monofunction enzymes to ideally transform the target PKS product into the final downstream target product

### Import key dependencies

If you don't have chemaxon installed or have chemaxon installed but do not possess a valid license, you will likely get the following error message: *"No valid license for cxcalc installed, operating in read-only mode. A local cache may be loaded, but no compounds can be created."* Don't worry, you'll still be able to generate pathways and calculate the reaction thermodynamics for a number of reactions. In fact, because getting a chemaxon license on the cluster is difficult, you can also comment out the line `from biosynth_pipeline import thermo` and perform the thermodynamic calculations of pathways locally instead.

In [1]:
from biosynth_pipeline import biosynth_pipeline

Initialize the reaction feasibility classifier to plug into `Biosynth_pipeline`. This is a machine learning model that helps to calculate the likelihood of enzymatic reactions predicted by Pickaxe in terms of a feasibility/ probability score between 0 and 1. This score is a function of both the thermodynamic feasibility of a reaction as well as its chemical probability. For large expansions, the number of pathways suggested by Pickaxe can increase exponentially in number, rendering it far too cumbersome to manually sieve through pathways and pick one that would be feasible to implement experimentally. Thus, this `feasibility_classifier` can be initialized to rank the feasibility of single enzymatic reactions and consequently, pathways, eventually helping to pick the most feasible pathway that should be implemented experimentally.

In [None]:
### initialize the feasibility classifier to plug into biosynth_pipeline object later

feasibility_model_path = '../models/updated_model_Apr28' # can pick the '.pkl' or '.dat' file too
calibration_model_path = '../models/updated_model_Apr28_calibration'
cofactors_path = '../data/coreactants_and_rules/all_cofactors_updated.csv'
fp_type = 'ecfp4'
nBits = 2048
max_species = 4
cofactor_positioning = 'by_descending_MW'

PX = biosynth_pipeline.feasibility_classifier(feasibility_model_path = feasibility_model_path,
                                                   calibration_model_path = calibration_model_path,
                                                   cofactors_path = cofactors_path,
                                                   fp_type = fp_type,
                                                   nBits = nBits,
                                                   max_species = max_species,
                                                   cofactor_positioning = cofactor_positioning)

### Setting up Biosynth Pipeline for combined propionic acid biosynthesis

Instantiate an object of the `biosynth_pipeline` class. Following is the set of arguments used to build each object

* `pathway_sequence`: Order of PKS and non-PKS transformations. If you wish to perform PKS transformations first (more common), then set to: `['pks','non_pks']`. if you wish to perform non-PKS transformations first, however, set this to: `['non_pks','pks']`.

* `target_smiles`: Simplified Molecular Input Linear Entry System (SMILES) string for the downstream target molecule. For instance, 'CCO' corresponds to ethanol while 'CCCO' corresponds to propanol. You can use the following site to first draw your target molecular structure and obtain then its SMILES string: http://www.cheminfo.org/flavor/malaria/Utilities/SMILES_generator___checker/index.html

* `non_pks_rules`: Reaction rules/ operators to use for non-PKS enzymatic expansion. You can choose to set this to `biological_generalized` or you can set it to `biological_intermediate`. The generalized rules are a set of 1224 reaction operators that digitally encode enzyme-substrate promiscuity by templating the functional group changes directly occurring at the reactive site between a substrate and product. Meanwhile, the intermediate rules are a set of 3604 reaction operators that digitally encode enzyme-substrate promiscuity by not just templating functional group changes directly on a reactive site but also the chemical environments neighboring a reaction site. If you wish to perform a hybrid synthesis instead, you can also set this argument to `chemical` (default: `biological_intermediate`).

* `non_pks_cores`: Number of computing cores to run Pickaxe (default: `1`).

* `non_pks_sim_score_filter`: Decide if a similarity threshold filter should be used prune Pickaxe expansions. If set to `True`, you will need to provide a list of Tanimoto similarity thresholds in the next argument. Intermediate metabolites that are similar to the product (i.e. their Tanimoto similarity to the target exceeds this threshold) will progress onto future reactions while dissimilar metbaolites will be discarded (default: `False`).

* `non_pks_sim_score_cutoffs`: List of Tanimoto similarity thresholds for each generation, e.g.: `[0.2,0.4]` (default: `None`). 

* `non_pks_sim_sample`: If set to `True`, similarity sampling will be used during Pickaxe expansion to prune the reaction network. This filter first calculates the distribution of Tanimoto similarities amongst intermediate metabolites and then weights this distribution towards metabolites most similar to the target. Once this weighted distribution has been created, metabolites that will progress onto the next reaction step are sampled with a skew towards metabolites most similar to the target (default: `False`).

* `non_pks_sim_sample_size`: Number of Pickaxe intermediates to be sampled if similarity sampling is used. Typically 1000 is a suitable sampling size (default: `None`).

* `feasibility classifier`: Reaction feasibility model object, i.e. `PX` defined above

* `remove_stero`: If set to `False`, stereochemistry will be considered in Retrotide. This is an important metric for evaluating similarity with Retrotide (default: `False`). 

* `cofactors_filepath`, `known_metabolites_filepath`: Filepaths for the set of cofactors to use for Pickaxe and the set of metabolites present across Brenda, Kegg, and MetaCyc

In [None]:
# these are the most important inputs to decide on
pathway_sequence = ['pks', 'non_pks']  # do retrotide first then pickaxe
target_smiles = 'CCC(=O)O' # propionic acid

non_pks_rules = 'biological_intermediate'  # intermediate enzymatic reaction rules for pickaxe (can choose chemical too)
non_pks_steps = 2
non_pks_cores = 5
non_pks_sim_score_filter = False
non_pks_sim_score_cutoffs = None
non_pks_sim_sample = False
non_pks_sim_sample_size = None

# the following arguments would typically always be fixed
remove_stereo = False # since retrotide relies on stereochemistry
cofactors_filepath = '../data/coreactants_and_rules/all_cofactors.tsv' 
known_metabolites_filepath = '../data/all_known_metabolites.txt' 

With these arguments defined, let's go ahead and instantiate an object of the `biosyth_pipeline` class in a bit of a verbose fashion:

In [None]:
# create an instance of the biosynth_pipeline class
biosynth_pipeline_object_verbose = biosynth_pipeline.biosynth_pipeline(pathway_sequence=pathway_sequence,
                                             target_smiles = target_smiles,
                                             feasibility_classifier = PX,
                                             remove_stereo = remove_stereo,
                                             known_metabolites = known_metabolites_filepath,
                                             non_pks_cofactors = cofactors_filepath,
                                             non_pks_rules = non_pks_rules,
                                             non_pks_steps = non_pks_steps,
                                             non_pks_cores = non_pks_cores,
                                             non_pks_sim_score_filter = non_pks_sim_score_filter,
                                             non_pks_sim_score_cutoffs = non_pks_sim_score_cutoffs,
                                             non_pks_sim_sample = non_pks_sim_sample,
                                             non_pks_sim_sample_size = non_pks_sim_sample_size)

Many of these are also optional arguments so an object of the `biosynth_pipeline` class can also be instantiated as shown below. The most important parameters are really just pathway sequence (`pathway_sequence`), the SMILES string of the eventual target product (`target_smiles`) and the number of non-PKS enzymatic steps between the PKS product and eventual target product

In [None]:
# create an instance of the biosynth_pipeline class
pathway_sequence = ['pks', 'non_pks']  # do retrotide first then pickaxe
target_smiles = 'CCC(=O)O' # propionic acid

non_pks_steps = 2

biosynth_pipeline_object = biosynth_pipeline.biosynth_pipeline(pathway_sequence=pathway_sequence,
                                             target_smiles=target_smiles,
                                             feasibility_classifier = PX,
                                             non_pks_steps=non_pks_steps)

Run a PKS synthesis first. The release mechanism via which a bound product will detach from the PKS modules must be specified via the `pks_release_mechanism` parameter.

In [None]:
biosynth_pipeline_object.run_pks_synthesis(pks_release_mechanism='thiolysis')

This results tells us that the PKS design comprises of two modules: a loading module, which accepts Methylmalonly-CoA as the starter unit and a module 1, which accepts Malonyl-CoA as an extender unit.

#### Examining PKS designs

If you are familiar with the documentation behind `Retrotide`, much of `Retrotide`'s functionalities have already been built into `biosynth_pipeline`. For instance, in order to access the details of the top-ranked PKS design, you can simply run `repr(biosynth_pipeline_object.pks_designs[-1][0][0].modules)`, just like how in `Retrotide`, you can run `repr(designs[-1][0][0].modules`.

In [None]:
repr(biosynth_pipeline_object.pks_designs[-1][0][0].modules)

Again, just like `Retrotide`, you can also compute the structure of the final PKS product using `.computeProduct(structureDB)`

In [None]:
biosynth_pipeline_object.pks_designs[-1][0][0].computeProduct(structureDB)

Notice how this PKS product has a sulfur radical attached to it. This is because in this form, the product is still tethered to the PKS domain. A common mechanism for termination is thiolysis, which you have already set in passing the argument `pks_release_mechanism = 'thiolysis'` to `biosynth_pipeline_object.run_pks_synthesis(pks_release_mechanism='thiolysis')`. There also exist other mechanisms of chain termination, such as `lactonization` and `macrocyclization`. These additional chain termination mechanism reactions have not been built into this release of `biosynth_pipeline` yet but will be in future releases.

In [None]:
Chem.MolFromSmiles(biosynth_pipeline_object.pks_top_final_product)

We have accessed the top PKS design so far. Let us try to access the next best PKS design and see if we can compute the final PKS product from it using a thiolysis termination.

The next best PKS design can be accessed by printing the attribute, `repr(biosynth_pipeline_object.pks_designs[-1][1][0].modules)`. Here, the `[-1][1][0]` index refers to the next best PKS design while the index `[-1][2][0]` would refer to the third best PKS design.

In [None]:
repr(biosynth_pipeline_object.pks_designs[-1][1][0].modules)

and just like with the top PKS product above, running the method `biosynth_pipeline_object.pks_designs[-1][1][0].computeProduct(structureDB)` will show the final PKS structure but while it is still bound to the PKS chain.

In [None]:
biosynth_pipeline_object.pks_designs[-1][1][0].computeProduct(structureDB)

In order to view the final unbound product, we can run the method `run_pks_termination` to 

In [None]:
biosynth_pipeline_object.pks_designs[-1][1][0].computeProduct(structureDB)

#### Non-PKS enzymatic synthesis

In [None]:
Chem.MolFromSmiles(biosynth_pipeline_object.pks_top_final_product)

#### Non-PKS enzymatic synthesis

Once the PKS synthesis is complete, a non-PKS synthesis can be run via Pickaxe using the `.run_non_pks_synthesis_post_pks` method. This function takes the `max_designs` to try as an input parameter. If the target product cannot be reached by non-PKS enzymes within the specified number of non-PKS steps, the next best N PKS designs will be used instead as starting points for non-PKS synthesis.

In [None]:
non_pks_pathways = biosynth_pipeline_object.run_non_pks_synthesis_post_pks(max_designs=5)

Print out pathways generated by Pickaxe using non-PKS enzymes between the top PKS product and propionic acid. This is a dictionary where each key is the pathway number and the corresponding value is another dictionary comprising details about the pathway

In [None]:
non_pks_pathways

These pathways can now be ranked using the reaction feasibility machine learning model. Notice how under each pathway, a list of feasibility scores has been added. The list of values under the key `feasibilities` represent the feasibility of each reaction in a pathway while the value under the key `net feasibility` represents the aggregate feasibility of the pathway, which is calculated by taking the product of each reaction's feasibility

In [None]:
non_pks_pathways = biosynth_pipeline_object.rank_non_pks_pathways(non_pks_pathways)

In [None]:
non_pks_pathways

We can run a simple sorting algorithm to now rank these pathways using their `net feasibility` from most to least feasible

In [None]:
# Convert 'net feasibility' strings to floats for sorting
sorted_data = sorted(non_pks_pathways.items(), key=lambda x: float(x[1]['net feasibility']), reverse=True)

# Convert the sorted list of tuples back to a dictionary
sorted_dict = dict(sorted_data)

print(sorted_dict)

### Examining pathways further

We can see from the dictionary above that the pathways have been rearranged from highest `net feasibility` to lowest `net feasibility`. Let's pick the top pathway, `Pathway 12` and examine it further:

In [None]:
sorted_dict['Pathway 12']

We can see that this pathway starts with the compound `CC(=O)CC(=O)O`, goes through the intermediate `CC(=O)C(C)C(=O)O`, and then reaches the final propionic acid target, `CCC(=O)O`. The two reactions in this pathway are:  

(1) CC(=O)CC(=O)O + C[S+](CCC(N)C(=O)O)CC1OC(n2cnc3c(N)ncnc32)C(O)C1O >> CC(=O)C(C)C(=O)O + Nc1ncnc2c1ncn2C1OC(CSCCC(N)C(=O)O)C(O)C1O',

and:

CC(=O)C(C)C(=O)O + O >> CCC(=O)O + CC(=O)O.

The first reaction has a feasibility score of 0.8636364 and the second has a feasibility score of 0.98273575. This means that the pathway has a net feasibility score of 0.8636364 x 0.98273575 = 0.8487263652813

We can also print out the enzymes associated with each reaction rule that 

In [None]:
intermediate_rules_df = pd.read_csv('../data/coreactants_and_rules/JN3604IMT_rules.tsv',delimiter='\t')

In [None]:
rxn_1_rule = 'rule0043_12'

In [None]:
rxn_1_enzymes = list(intermediate_rules_df[intermediate_rules_df['Name'] == rxn_1_rule]['Comments'])

In [None]:
rxn_1_enzymes

In [None]:
rxn_2_rule = 'rule0085_2'

In [None]:
rxn_2_enzymes = list(intermediate_rules_df[intermediate_rules_df['Name'] == rxn_2_rule]['Comments'])

In [None]:
rxn_2_enzymes

The enzymes above are all listed in terms of their UNIPROT IDs. We can search uniprot.org to see if the documented, native reactions are similar to the ones predicted by Pickaxe

### Calculating reaction thermodynamics

Additionally, we can calculate the thermodynamics of reactions in the pathway with the following function. Reaction thermodynamics are calculated with eQuilibrator. This in turn requires a valid ChemAxon license. If you don't have a ChemAxon license, don't worry. Recall that the feasibility classifier already takes thermodynamics into account with its predictions

In [None]:
from biosynth_pipeline import thermo
non_pks_pathways = biosynth_pipeline_object.calc_pathway_thermo(non_pks_pathways, ub = 1e-2, lb = 1e-4)

If I were to print out the dictionary `non_pks_pathways` now, an extra field, `dG`, i.e. the reaction gibbs free energy has been added as a value under each pathway.

In [None]:
non_pks_pathways

Sorting this again from highest to lowest feasibility scores gives:

In [None]:
# Convert 'net feasibility' strings to floats for sorting
sorted_data = sorted(non_pks_pathways.items(), key=lambda x: float(x[1]['net feasibility']), reverse=True)

# Convert the sorted list of tuples back to a dictionary
sorted_dict = dict(sorted_data)

print(sorted_dict)

### Save pathways locally

In [None]:
with open('biosynth_pipeline_demo.json', 'w') as json_file:
    json.dump(non_pks_pathways, json_file)

### Accessing attributes

The `biosynth_pipeline_object` that we instantiated has attributes that we can access as follows:

In [None]:
# SMILES string of the final, downstream target molecule
biosynth_pipeline_object.target_smiles

In [None]:
# list of reaction operators used for non-PKS reaction network expansion
biosynth_pipeline_object.rule_filepath

In [None]:
# set of all known metabolites across Brenda, Kegg, and MetaCyc
biosynth_pipeline_object.known_metabolites

In [None]:
# display the final product from the best PKS design
biosynth_pipeline_object.pks_top_final_product

In [None]:
repr(biosynth_pipeline_object.pks_designs[0][0])

In [None]:
# filepath of cofactors used for non-PKS reaction network expansion
biosynth_pipeline_object.non_pks_cofactors

In [None]:
# number of non-PKS reaction steps used
biosynth_pipeline_object.non_pks_steps

In [None]:
# number of computing cores for non-PKS reaction steps
biosynth_pipeline_object.non_pks_cores

In [None]:
# display if similarity thresholds were used for non-PKS reaction network expansion
biosynth_pipeline_object.non_pks_sim_score_filter

In [None]:
# display what the similarity thresholds are (if not None)
biosynth_pipeline_object.non_pks_sim_score_cutoffs

In [None]:
# display if similarity sampling was used for non-PKS reaction network expansion
biosynth_pipeline_object.non_pks_sim_sample

In [None]:
# display the similarity sampling size (if not None)
biosynth_pipeline_object.non_pks_sim_sample_size

In [None]:
# print out non-PKS pathways
biosynth_pipeline_object.non_pks_pathways