In [1]:
##Importing all needed libraries
try:
    # Summit-related imports
    import summit
    from summit.benchmarks import ExperimentalEmulator
    from summit.domain import *
    from summit.utils.dataset import DataSet
    from summit.strategies import SOBO, MultitoSingleObjective, LHS

    # External libraries
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt

    # File and path handling
    import pathlib
    import os
    import logging

except ModuleNotFoundError as e:
    print(f"Error: {e}. Please install the required libraries before running the program.")


In [2]:
# Configurable parameters
base_path = pathlib.Path("D:/!PythonCode/ChemistryOptimization/DataSets/MidazTest")
PROJECT_NAME = "MidazTest"
BOUNDS_NAME = f"{PROJECT_NAME}_Bounds.csv"
DATA_NAME = f"{PROJECT_NAME}_Data.csv"
LOG_NAME = f"{PROJECT_NAME}_Log.csv"
#BOUNDS_NAME = "Nakul_Midazolam_BoundariesV2.csv"
#DATA_NAME = "StartExp.csv"

# Folder Name
DATA_DIR = "Data"
MODEL_DIR = "Models"
IT_DIR = "IterData"
LOG_DIR = "Logs"

# Function to create directory if it doesn't exist
def create_directory(base_path, directory):
    """Create a directory if it doesn't exist."""
    dir_path = base_path / directory
    if not dir_path.is_dir():
        dir_path.mkdir(parents=True)
        
# Create directories
for directory in [DATA_DIR, MODEL_DIR, IT_DIR, LOG_DIR]:
    create_directory(base_path, directory)

# Set data paths
data_path = base_path / DATA_DIR
model_path = base_path / MODEL_DIR
it_path = base_path / IT_DIR
log_path = base_path / LOG_DIR

# Configure logging
log_file_path = log_path / LOG_NAME
logging.basicConfig(
    filename = log_file_path,
    level = logging.INFO,
    format = "%(asctime)s - %(levelname)s - %(message)s",
)

# Load initial boundaries data
try:
    init_bounds_df = pd.read_csv(data_path / BOUNDS_NAME)
except FileNotFoundError:
    print(f"Error: File '{BOUNDS_NAME}' not found. Please check the file path.")
except pd.errors.EmptyDataError:
    print(f"Error: File '{BOUNDS_NAME}' is empty or in an invalid format.")


In [3]:
init_bounds_df #Temporary code to visualize the Boundaries.csv dataframe

Unnamed: 0,Condition,Type,Categories,BoundaryMin,BoundaryMax,Description,Maximize
0,Temperature,Continuous,,40.0,80.0,Reaction temperature in degrees Celsius (ºC),
1,Catalyst_Amount,Continuous,,0.01,1.0,Catalyst amounts in molar equivalents (Equiv.),
2,Starting_Reagent,Continuous,,1.1,2.0,2-Methylimidozole amounts in molar equivalents...,
3,Solvent,Continuous,,0.1,0.35,Solvent amount in milliliters (mL),
4,Time,Continuous,,2.0,24.0,Duration of reaction in hours (hr),
5,Base,Continuous,,1.0,5.0,Base amount in molar equivalents (Equiv.),
6,Main_Product,Objective,,0.0,1.0,LCAP of Main Product,True
7,Main_Impurity,Objective,,0.0,1.0,LCAP of Main Impurity,False


In [4]:
"""
Create a Summit domain based on the provided boundaries DataFrame.

Parameters:
- init_bounds_df (pd.DataFrame): DataFrame containing information about variable boundaries.

Returns:
- domain (Domain): The created Summit domain.
- obj_df (pd.DataFrame): DataFrame for objective variables.
- in_count (int): Count of input variables.
- out_count (int): Count of output variables.
"""
if not isinstance(init_bounds_df, pd.DataFrame) or init_bounds_df.empty:
    raise ValueError("Invalid input: init_bounds_df must be a non-empty DataFrame.")

domain = Domain()
obj_df = pd.DataFrame()
obj_df = DataSet.from_df(obj_df)

in_count = 0
out_count = 0

for idx, row in init_bounds_df.iterrows():
    name = row[0]
    description = row[5]
    data_type = row[1]

    if data_type == 'Categorical':
        levels = row[2].split(',')

        domain += CategoricalVariable(
            name = name,
            description = description,
            levels = levels
        )
        in_count += 1

    elif data_type == 'Continuous':
        bounds = [row[3], row[4]]

        domain += ContinuousVariable(
            name = name,
            description = description,
            bounds = bounds
        )
        in_count += 1

    elif data_type == 'Objective':
        bounds = [row[3], row[4]]
        maximize = row[6]

        domain += ContinuousVariable(
            name = name,
            description = description,
            bounds = bounds,
            is_objective = True,
            maximize = maximize
        )
        out_count += 1

        obj_df[(name, "DATA")] = ""



In [5]:
domain

0,1,2,3
Name,Type,Description,Values
Temperature,"continuous, input",Reaction temperature in degrees Celsius (ºC),"[40.0,80.0]"
Catalyst_Amount,"continuous, input",Catalyst amounts in molar equivalents (Equiv.),"[0.01,1.0]"
Starting_Reagent,"continuous, input",2-Methylimidozole amounts in molar equivalents (Equiv.),"[1.1,2.0]"
Solvent,"continuous, input",Solvent amount in milliliters (mL),"[0.1,0.35]"
Time,"continuous, input",Duration of reaction in hours (hr),"[2.0,24.0]"
Base,"continuous, input",Base amount in molar equivalents (Equiv.),"[1.0,5.0]"
Main_Product,"continuous, maximize objective",LCAP of Main Product,"[0.0,1.0]"
Main_Impurity,"continuous, minimize objective",LCAP of Main Impurity,"[0.0,1.0]"


In [6]:
BOUNDS_NAME = f"{PROJECT_NAME}_BoundsE.csv"
init_bounds_df = pd.read_csv(data_path / BOUNDS_NAME)
"""
Create a Summit domain based on the provided boundaries DataFrame.

Parameters:
- init_bounds_df (pd.DataFrame): DataFrame containing information about variable boundaries.

Returns:
- domain (Domain): The created Summit domain.
- obj_df (pd.DataFrame): DataFrame for objective variables.
- in_count (int): Count of input variables.
- out_count (int): Count of output variables.
"""
if not isinstance(init_bounds_df, pd.DataFrame) or init_bounds_df.empty:
    raise ValueError("Invalid input: init_bounds_df must be a non-empty DataFrame.")

domain = Domain()
obj_df = pd.DataFrame()
obj_df = DataSet.from_df(obj_df)

in_count = 0
out_count = 0

for idx, row in init_bounds_df.iterrows():
    name = row[0]
    description = row[5]
    data_type = row[1]

    if data_type == 'Categorical':
        levels = row[2].split(',')

        domain += CategoricalVariable(
            name = name,
            description = description,
            levels = levels
        )
        in_count += 1

    elif data_type == 'Continuous':
        bounds = [row[3], row[4]]

        domain += ContinuousVariable(
            name = name,
            description = description,
            bounds = bounds
        )
        in_count += 1

    elif data_type == 'Objective':
        bounds = [row[3], row[4]]
        maximize = row[6]

        domain += ContinuousVariable(
            name = name,
            description = description,
            bounds = bounds,
            is_objective = True,
            maximize = maximize
        )
        out_count += 1

        obj_df[(name, "DATA")] = ""

In [7]:
domain

0,1,2,3
Name,Type,Description,Values
Temperature,"continuous, input",Reaction temperature in degrees Celsius (ºC),"[40.0,80.0]"
Catalyst_Amount,"continuous, input",Catalyst amounts in molar equivalents (Equiv.),"[0.01,1.99]"
Starting_Reagent,"continuous, input",2-Methylimidozole amounts in molar equivalents (Equiv.),"[1.1,2.0]"
Solvent,"continuous, input",Solvent amount in milliliters (mL),"[0.1,0.6]"
Time,"continuous, input",Duration of reaction in hours (hr),"[2.0,24.0]"
Base,"continuous, input",Base amount in molar equivalents (Equiv.),"[1.0,5.0]"
Main_Product,"continuous, maximize objective",LCAP of Main Product,"[0.0,1.0]"
Main_Impurity,"continuous, minimize objective",LCAP of Main Impurity,"[0.0,1.0]"
