In [1]:
##Importing all needed libraries
try:
    # Summit-related imports
    import summit
    from summit.benchmarks import ExperimentalEmulator
    from summit.domain import *
    from summit.utils.dataset import DataSet
    from summit.strategies import SOBO, MultitoSingleObjective, LHS

    # External libraries
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt

    # File and path handling
    import pathlib
    import os
    import logging
    import re

except ModuleNotFoundError as e:
    print(f"Error: {e}. Please install the required libraries before running the program.")


In [2]:
# Configurable parameters
base_path = pathlib.Path("D:/!PythonCode/ChemistryOptimization/DataSets/MidazTest")
PROJECT_NAME = "MidazTest"
BOUNDS_NAME = f"{PROJECT_NAME}_Bounds.csv"
DATA_NAME = f"{PROJECT_NAME}_Data.csv"
LOG_NAME = f"{PROJECT_NAME}_Log.csv"
#BOUNDS_NAME = "Nakul_Midazolam_BoundariesV2.csv"
#DATA_NAME = "StartExp.csv"

# Folder Name
DATA_DIR = "Data"
MODEL_DIR = "Models"
IT_DIR = "IterData"
LOG_DIR = "Logs"

# Function to create directory if it doesn't exist
def create_directory(base_path, directory):
    """Create a directory if it doesn't exist."""
    dir_path = base_path / directory
    if not dir_path.is_dir():
        dir_path.mkdir(parents=True)
        
# Create directories
for directory in [DATA_DIR, MODEL_DIR, IT_DIR, LOG_DIR]:
    create_directory(base_path, directory)

# Set data paths
data_path = base_path / DATA_DIR
model_path = base_path / MODEL_DIR
it_path = base_path / IT_DIR
log_path = base_path / LOG_DIR

# Configure logging
log_file_path = log_path / LOG_NAME
logging.basicConfig(
    filename = log_file_path,
    level = logging.INFO,
    format = "%(asctime)s - %(levelname)s - %(message)s",
)

# Load initial boundaries data
try:
    init_bounds_df = pd.read_csv(data_path / BOUNDS_NAME)
except FileNotFoundError:
    print(f"Error: File '{BOUNDS_NAME}' not found. Please check the file path.")
except pd.errors.EmptyDataError:
    print(f"Error: File '{BOUNDS_NAME}' is empty or in an invalid format.")


In [3]:
init_bounds_df #Temporary code to visualize the Boundaries.csv dataframe

Unnamed: 0,Condition,Type,Categories,BoundaryMin,BoundaryMax,Description,Maximize
0,Temperature,Continuous,,40.0,80.0,Reaction temperature in degrees Celsius (ºC),
1,Catalyst_Amount,Continuous,,0.01,1.0,Catalyst amounts in molar equivalents (Equiv.),
2,Starting_Reagent,Continuous,,1.1,2.0,2-Methylimidozole amounts in molar equivalents...,
3,Solvent,Continuous,,0.1,0.35,Solvent amount in milliliters (mL),
4,Time,Continuous,,2.0,24.0,Duration of reaction in hours (hr),
5,Base,Continuous,,1.0,5.0,Base amount in molar equivalents (Equiv.),
6,Main_Product,Objective,,0.0,1.0,LCAP of Main Product,True
7,Main_Impurity,Objective,,0.0,1.0,LCAP of Main Impurity,False


In [4]:
def it_count(start_exp_num, fin_exp_num, suggest_amount, project_name):
    """
    Calculate iteration-related values based on the start and finish experiment numbers.

    Parameters:
    - start_exp_num (int): Starting experiment number.
    - fin_exp_num (int): Finish experiment number.
    - suggest_amount (int): Number of experiments to suggest.
    - PROJECT_NAME (str): Name of the project.

    Returns:
    - tuple: Tuple containing iteration number, model names, and iteration names.
    """

    if fin_exp_num < start_exp_num:
        raise ValueError("Finish experiment number should be greater than or equal to the start experiment number.")

    current_it = (fin_exp_num - start_exp_num) / suggest_amount
    next_it = current_it + 1
    prev_it = current_it - 1
    
    model_name = f"{project_name}_Model_It{current_it}.json"
    prev_model_name = f"{project_name}_Model_It{prev_it}.json"
    it_name = f"{project_name}_Exp_It{next_it}.csv"
    prev_it_name = f"{project_name}_Exp_It{current_it}.csv"

    return current_it, model_name, prev_model_name, it_name, prev_it_name

def find_last_iteration_with_log(log_file_path, log_message):
    """
    Find the last iteration number with a specific log message in the log file.

    Parameters:
    - log_file_path (str): Path to the log file.
    - log_message (str): Log message to search for.

    Returns:
    - Tuple[bool, int or None]: (True, iteration) if the log message is found, (False, None) otherwise.
    """
    # Updated regex to capture iteration number with decimals
    pattern = re.compile(r"Iteration ([\d.]+): Program started(.*?)Iteration \1: Program completed", re.DOTALL | re.IGNORECASE)

    try:
        with open(log_file_path, 'r') as log_file:
            log_content = log_file.read()
    except FileNotFoundError:
        logging.error(f"Error: Log file '{log_file_path}' not found. Please check the file path.")
        return False, None

    matches = re.findall(pattern, log_content)

    # Reverse the order to start from the last iteration
    matches.reverse()
    print(matches)
    for iteration, logs in matches[:3]:  # Look in the last 3 iterations
        if log_message.lower() in logs.lower():  # Case-insensitive comparison
            return True, int(float(iteration))

    return False, None

# Example usage
log_message = "Expansion of bounds dataframe"
check, last_iteration = find_last_iteration_with_log(log_file_path, log_message)

if last_iteration is not None:
    print(f"Last iteration with '{log_message}': {last_iteration}")
else:
    print("Log message not found or log file not accessible.")


Last iteration with 'Expansion of bounds dataframe': 4


In [5]:
check_distance = 2
EXPLORE_QUAN = 3

def perform_summit_optimization(data_df, suggest_amount, expression, domain, project_name, model_path, it_path, start_exp_num, fin_exp_num, current_it, model_name, prev_model_name, it_name, prev_it_name):
    """
    Perform Summit optimization.

    Parameters:
    - data_df (pd.DataFrame): Data DataFrame.
    - suggest_amount (int): Number of experiments to suggest.
    - expression (str): Mathematical expression for optimization.
    - domain (dict): Domain for optimization.
    - project_name (str): Name of the project.
    - model_path (Path): Path to save/load optimization model.
    - it_path (Path): Path to save iteration data.
    - start_exp_num (int): Starting experiment number.
    - fin_exp_num (int): Finish experiment number.
    """

    if check_distance = 0:
        
        transform = MultitoSingleObjective(
            domain = domain,
            expression = expression,
            maximize = True
        )
        
        strategy = SOBO(
            domain = domain,
            transform = transform
        )

        new_it = strategy.suggest_experiments(
            num_experiments = suggest_amount,
            prev_res = data_df
        )
        
        it_exp_df = pd.concat([data_df, new_it], axis = 0)
        it_exp_df.reset_index(drop = True, inplace = True)
        
        # data_path = os.path.join(data_path, DATA_NAME)
        # it_exp_df.to_csv(data_path, index=True)
        
        it_data_path = os.path.join(it_path, it_name)
        new_it.to_csv(it_data_path)
        
        model_data_path = os.path.join(model_path, model_name)
        strategy.save(model_data_path)
        
        print("Your first new experimental condition has been added to run. The model has been saved in the directory.")
        print(new_it)

    if current_it == 0:

        transform = MultitoSingleObjective(
            domain = domain,
            expression = expression,
            maximize = True
        )
        
        strategy = SOBO(
            domain = domain,
            transform = transform
        )

        new_it = strategy.suggest_experiments(
            num_experiments = suggest_amount,
            prev_res = data_df
        )
        
        it_exp_df = pd.concat([data_df, new_it], axis = 0)
        it_exp_df.reset_index(drop = True, inplace = True)
        
        # data_path = os.path.join(data_path, DATA_NAME)
        # it_exp_df.to_csv(data_path, index=True)
        
        it_data_path = os.path.join(it_path, it_name)
        new_it.to_csv(it_data_path)
        
        model_data_path = os.path.join(model_path, model_name)
        strategy.save(model_data_path)
        
        print("Your first new experimental condition has been added to run. The model has been saved in the directory.")
        print(new_it)

    else: 

        prev_it = data_df.iloc[-1:].copy()
        prev_it.to_csv(it_path / prev_it_name)
        strategy = SOBO.load(model_path / prev_model_name)
        
        new_it = strategy.suggest_experiments(
            num_experiments = suggest_amount,
            prev_res = prev_it
        )
        
        it_exp_df = pd.concat([data_df, new_it], axis = 0)
        it_exp_df.reset_index(drop = True, inplace = True)
        
        # data_path = os.path.join(data_path, DATA_NAME)
        # it_exp_df.to_csv(data_path, index=True)
        
        it_data_path = os.path.join(it_path, it_name)
        new_it.to_csv(it_data_path)
        
        model_data_path = os.path.join(model_path, model_name)
        strategy.save(model_data_path)
        
        print("A new experimental condition has been added to run. The model has been saved in the directory.")
        print(new_it)

