## Imports

In [None]:
## test
######
####

In [None]:
import pandas as pd
import numpy as np

df_AA2024 = pd.read_excel('data/filtered_AA2024.xlsx')
print(df_AA2024.describe())

In [None]:
print(df_AA2024.head())

In [None]:
from baybe.targets import NumericalTarget
from baybe.objective import Objective

target = NumericalTarget(
    name="Efficiency",
    mode="MAX",
)
objective = Objective(mode="SINGLE", targets=[target])

In [None]:
from baybe.parameters import (
    NumericalContinuousParameter,
)

parameters = [
    NumericalContinuousParameter(
        name="Time[h]",
        bounds=(0.500000, 672),
    ),
    NumericalContinuousParameter(
        name="pH",
        bounds=(1, 14),
    ),
]

In [None]:
from baybe.searchspace import SearchSpace

searchspace = SearchSpace.from_product(parameters)

In [None]:
from baybe import Campaign

campaign = Campaign(searchspace, objective)

In [None]:
df = campaign.recommend(batch_size=3)
print(df)

In [None]:
############################
############################
############################

In [1]:
import numpy as np
import pandas as pd

from baybe.targets import NumericalTarget
from baybe.objective import Objective

from baybe.parameters import NumericalDiscreteParameter, NumericalContinuousParameter
from baybe.searchspace import SearchSpace

from baybe.recommenders import RandomRecommender, SequentialGreedyRecommender
from baybe.surrogates import GaussianProcessSurrogate

from baybe.strategies import TwoPhaseStrategy
from baybe import Campaign



### Setting the objectives

The DESIRABILITY mode enables the combination multiple targets via scalarization into a single value.

See MATCH mode, instead of MAX/MIN + For more details on transformation functions: 
https://emdgroup.github.io/baybe/userguide/targets.html

### Set targets/objectives, efficiency?

In [2]:
"""
overpotential = NumericalTarget(
    name="overpotential", 
    mode="MAX", 
    bounds=(-400, 0),
    transformation="LINEAR"  # optional, will be applied if bounds are not None, LINEAR only one available for MAX/MIN
    ) 

overpotential_slope = NumericalTarget(
    name="overpotential_slope", 
    mode="MAX", 
    bounds=(-0.05, 0.05),
    transformation="LINEAR"  # optional, will be applied if bounds are not None, LINEAR only one available for MAX/MIN
    )

objective = Objective(
    mode="DESIRABILITY",
    targets=[overpotential, overpotential_slope],
    weights=[1.0, 1.0],  # optional, by default all weights are equal
    combine_func="GEOM_MEAN",  # optional, geometric mean is the default
)
"""

'\noverpotential = NumericalTarget(\n    name="overpotential", \n    mode="MAX", \n    bounds=(-400, 0),\n    transformation="LINEAR"  # optional, will be applied if bounds are not None, LINEAR only one available for MAX/MIN\n    ) \n\noverpotential_slope = NumericalTarget(\n    name="overpotential_slope", \n    mode="MAX", \n    bounds=(-0.05, 0.05),\n    transformation="LINEAR"  # optional, will be applied if bounds are not None, LINEAR only one available for MAX/MIN\n    )\n\nobjective = Objective(\n    mode="DESIRABILITY",\n    targets=[overpotential, overpotential_slope],\n    weights=[1.0, 1.0],  # optional, by default all weights are equal\n    combine_func="GEOM_MEAN",  # optional, geometric mean is the default\n)\n'

### Search Space

In [3]:
parameters = [
NumericalDiscreteParameter(
    name="Time (h)",
    values=np.arange(6, 25, 1) # Assuming time below 6 hours is discarded
),
NumericalDiscreteParameter(
        name="pH",
        values=np.arange(-1, 15.1, 0.1)
    ),  
NumericalContinuousParameter( # Set this as continuous, the values seem quite small?
        name="Inhibitor Concentration (M)",
        bounds=(0, 0.02)
    ),
NumericalDiscreteParameter(
        name="Salt Concentration (M)",
        values=np.arange(0, 2.01, 0.01),
    )
]

**Substance parameter**

Instead of values, this parameter accepts data in form of a dictionary. The items correspond to pairs of labels and SMILES. SMILES are string-based representations of molecular structures. Based on these, BayBE can assign each label a set of molecular descriptors as encoding.

For instance, a parameter corresponding to a choice of solvents can be initialized with:

In [4]:
from baybe.parameters import SubstanceParameter

SubstanceParameter(
    name="Solvent",
    data={
        "Water": "O",
        "1-Octanol": "CCCCCCCCO",
        "Toluene": "CC1=CC=CC=C1",
    },
    encoding="MORDRED",  # optional
    decorrelate=0.7,  # optional
)

SubstanceParameter(name='Solvent', data={'Water': 'O', '1-Octanol': 'CCCCCCCCO', 'Toluene': 'CC1=CC=CC=C1'}, decorrelate=0.7, encoding=<SubstanceEncoding.MORDRED: 'MORDRED'>)


The encoding option defines what kind of descriptors are calculated:

MORDRED: 2D descriptors from the Mordred package. Since the original package is now unmaintained, baybe requires the community replacement mordredcommunity

RDKIT: 2D descriptors from the RDKit package

MORGAN_FP: Morgan fingerprints calculated with RDKit (1024 bits, radius 4)

These calculations will typically result in 500 to 1500 numbers per molecule. **To avoid detrimental effects on the surrogate model fit, we reduce the number of descriptors via decorrelation before using them.** For instance, the decorrelate option in the example above specifies that only descriptors with a correlation lower than 0.7 to any other descriptor will be kept. This usually reduces the number of descriptors to 10-50, depending on the specific items in data.

**WARNING:**
The descriptors calculated for a SubstanceParameter were developed to describe small molecules and are not suitable for other substances. If you deal with large molecules like polymers or arbitrary substance mixtures, we recommend to provide your own descriptors via the CustomParameter.

The encoding concept introduced above is generalized by the CustomParameter. Here, the user is expected to provide their own descriptors for the encoding.

Take, for instance, a parameter that corresponds to the choice of a polymer. Polymers are not well represented by the small molecule descriptors utilized in the SubstanceParameter. Still, one could provide experimental measurements or common metrics used to classify polymers:

In [5]:
import pandas as pd
from baybe.parameters import CustomDiscreteParameter

# Create or import new dataframe containing custom descriptors

descriptors = pd.DataFrame(
    {
        "Glass_Transition_TempC": [20, -71, -39],
        "Weight_kDalton": [120, 32, 241],
    },
    index=["Polymer A", "Polymer B", "Polymer C"],  # put labels in the index
)

CustomDiscreteParameter(
    name="Polymer",
    data=descriptors,
    decorrelate=True,  # optional, uses default correlation threshold = 0.7?
)

# Add this to the parameters list afterwards

CustomDiscreteParameter(name='Polymer', data=           Glass_Transition_TempC  Weight_kDalton
Polymer A                      20             120
Polymer B                     -71              32
Polymer C                     -39             241, decorrelate=True, encoding=<CustomEncoding.CUSTOM: 'CUSTOM'>)

In [6]:
searchspace = SearchSpace.from_product(parameters)

### Recommenders

The **SequentialGreedyRecommender** is a powerful recommender that leverages BoTorch optimization functions to perform sequential Greedy optimization. It can be applied for discrete, continuous and hybrid sarch spaces. It is an implementation of the BoTorch optimization functions for discrete, continuous and mixed spaces. **It is important to note that this recommender performs a brute-force search when applied in hybrid search spaces, as it optimizes the continuous part of the space while exhaustively searching choices in the discrete subspace.** You can customize this behavior to only sample a certain percentage of the discrete subspace via the sample_percentage attribute and to choose different sampling strategies via the hybrid_sampler attribute. 

e.g.
strategy = TwoPhaseStrategy(recommender=SequentialGreedyRecommender(hybrid_sampler="Farthest", sampling_percentage=0.3))

For implementing fully customized surrogate models e.g. from sklearn or PyTorch, see:
https://emdgroup.github.io/baybe/examples/Custom_Surrogates/Custom_Surrogates.html


In [7]:
available_surr_models = [
    "GaussianProcessSurrogate", 
    "BayesianLinearSurrogate",
    "MeanPredictionSurrogate",
    "NGBoostSurrogate",
    "RandomForestSurrogate"
]

available_acq_functions = [
    "qPI",  # q-Probability Of Improvement
    "qEI",  # q-Expected Improvement
    "qUCB", # q-upper confidence bound with beta of 1.0
]

# Defaults anyway
SURROGATE_MODEL = GaussianProcessSurrogate()
ACQ_FUNCTION = "qEI" # q-Expected Improvement, only q-fuctions are available for batch_size > 1

seq_greedy_recommender = SequentialGreedyRecommender(
        surrogate_model=SURROGATE_MODEL,
        acquisition_function_cls=ACQ_FUNCTION,
        hybrid_sampler="Farthest", # find more details in the documentation
        sampling_percentage=0.3, # should be relatively low
        allow_repeated_recommendations=False,
        allow_recommending_already_measured=False,
    )

hybrid_recommender = SequentialGreedyRecommender(
    allow_repeated_recommendations=False,
    allow_recommending_already_measured=False
)

### Campaign Strategy

In [10]:
strategy = TwoPhaseStrategy(
    initial_recommender = RandomRecommender(),  # Initial recommender, if no training data is available
    # Other initial recommenders don't seem to work for my hybrid search space/set of parameters
    # Doesn't matter since I already have training data
    recommender = seq_greedy_recommender,  # Bayesian model-based optimization
    # recommender = hybrid_recommender,
    switch_after=1  # Switch to the model-based recommender after 1 batch or iteration (so the initial training data)
)

campaign = Campaign(searchspace, objective, strategy)



NameError: name 'objective' is not defined

### Import and read modified Excel file as dataframe? - Now containing only specific columns as training data - as in possibly this example: 

https://emdgroup.github.io/baybe/examples/Backtesting/full_initial_data.html


https://emdgroup.github.io/baybe/examples/Backtesting/full_lookup.html

### For transfer learning see: 

https://emdgroup.github.io/baybe/userguide/transfer_learning

&

https://emdgroup.github.io/baybe/examples/Transfer_Learning/basic_transfer_learning.html