In [None]:
## Example for full simulation loop using a table-based lookup mechanism

# This example shows a simulation for a direct arylation where all combinations have been measured.
# This allows us to access information about previously conducted experiments from .xlsx-files.

# This example assumes some basic familiarity with using BayBE.
# We thus refer to [`campaign`](./../Basics/campaign.md) for a basic example.

### Necessary imports for this example

import os
import warnings
import sys

import pandas as pd
import seaborn as sns

from pathlib import Path
from baybe.utils.plotting import create_example_plots

FILE_TO_LOAD = "results_90_bq2_ndi10_mc15.csv"

# Let's define the path to the current directory.
script_path = Path(os.getcwd())
# Raise a warning if the path does not end with "scripts"
if script_path.name != "scripts":
    warnings.warn(
        "Please run this script from the 'scripts' directory to ensure that the data is loaded correctly."
    )
    raise SystemExit

results_dir = script_path.parent / "results"
data_dir = script_path.parent / "data"
if not data_dir.exists():
    raise FileNotFoundError(
        f"Data directory {data_dir} not found. Please make sure to run this script from the 'scripts' directory."
    )
if not results_dir.exists():
    results_dir.mkdir(exist_ok=True)
print(f"Current directory is {script_path}.")
print(f"Data is loaded from {data_dir}.")
print(f"Results will be saved in {results_dir}.")

warnings.filterwarnings('ignore')


In [None]:

# first, print the results to a file
path_to_results = results_dir / FILE_TO_LOAD
results = pd.read_csv(path_to_results)

# Let's visualize the results. As you can see, the amount of initial data used has a significant impact on the performance.
results.rename(columns={"Scenario": "% of data used"}, inplace=True)

# create a file suffix from the file name, from the stem, take everything after "results_"
file_suffix = FILE_TO_LOAD.split("results_")[1].split(".")[0]
path = Path(sys.path[0])
# change color per scenario
color_palette = sns.color_palette("husl", len(results["% of data used"].unique()))
ax = sns.lineplot(
    data=results,
    marker="o",
    markersize=10,
    x="Num_Experiments",
    y="yield_CumBest",
    hue="% of data used",
    palette=color_palette,
)
# create_example_plots(
#     ax=ax,
#     path=results_dir,
#     base_name=f"transfer_learning_plot_{file_suffix}",
# )

# Example: In file name "results_90_bq2_ndi10_mc15.csv";
# temperature = 90
# batch quantity = 2
# number of DOE iterations = 10
# number of Monte Carlo iterations = 15

TEMPERATURES = [90, 105, 120]

# get the temperature from the file name
temperature = int(file_suffix.split("_")[0])
# get the batch quantity from the file name
batch_quantity = int(file_suffix.split("_")[1].split("bq")[1])
# get the number of DOE iterations from the file name
num_doe_iterations = int(file_suffix.split("_")[2].split("ndi")[1])
# get the number of Monte Carlo iterations from the file name
num_mc_iterations = int(file_suffix.split("_")[3].split("mc")[1])
# get temperatures that are not the current temperature
other_temperatures = [temp for temp in TEMPERATURES if temp != temperature]
# get the number of experiments from the results
num_experiments = results["Num_Experiments"].max()

# change size of the figure
ax.get_figure().set_size_inches(6, 4)
# use integer values for the x-axis from the number of experiments
ax.set_xticks(range(0, num_experiments + 1, batch_quantity))
# put legend in the bottom left corner and take 'hue' as the title of the legend
ax.legend(title="% of existing data used", loc="lower right")
# set the title of the plot
ax.set_title(f"transfer learning from {other_temperatures} to {temperature} °C")
# set the x-axis label
ax.set_xlabel("# experiments")
# set the y-axis label
ax.set_ylabel("cumulative best yield / %")
# set resolution of the plot to 600 dpi
ax.get_figure().set_dpi(600)
# reduce white space around the plot
ax.get_figure().tight_layout()
# save the plot to the results directory in SVG format
ax.get_figure().savefig(
    results_dir / f"transfer_learning_plot_{file_suffix}.svg", format="svg"
)
