In [16]:
# Inverse design Benchmarking


# import required libraries
import pandas as pd
import glob
import numpy as np
import csv
import openai
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import re
import csv
import time # to get a unique timestamp
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
from dotenv import load_dotenv
import os
import dotenv
import ipywidgets as widgets
from IPython.display import display
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from sklearn.gaussian_process import GaussianProcessRegressor
load_dotenv()
api_key = os.getenv('API_KEY')
openai.api_key = api_key
model_name = "gpt-3.5-turbo"

## Utility functions

In [17]:
def find_matching_result(df, suggestion):
    if suggestion:
        # Create the suggestion string in the same format as the formulation strings
        suggestion_str = f'The formulation is Powderkg = {suggestion["powderkg"]}, wc = {suggestion["wc"]}, materials = {suggestion["materials"]}, curing = {suggestion["curing"]}'

        # Look for a match in the DataFrame
        match = df[df["Formulation"].str.lower() == suggestion_str.lower()]

        # If a match was found, return the lab result
        if not match.empty:
            return match.iloc[0]["Strength"]

    # If no match was found, return None
    return None

def parse_solution(response):
    # Initialize a dictionary to hold the solution
    solution = {}

    # Find matches for each key
    keys = ['powderkg', 'wc', 'materials', 'curing']
    for key in keys:
        # Use regex to find the key followed by = and the value
        match = re.search(fr'{key} = (.*?)(,|$)', response, re.IGNORECASE)
        if match:
            # If a match was found, add it to the solution dictionary
            value = match.group(1).strip()
            
            # Remove trailing period from the 'curing' value
            if key == 'curing':
                value = value.rstrip('.')
                
            solution[key] = value
        else:
            return None  # If any key wasn't found, return None

    # Return the solution dictionary if all keys were found
    return solution

    
def format_response_to_model(lab_result):
    """
    Given a lab result, format a response message to the model.
    """
    return f"We've achieved a compressive strength of {lab_result['fc_28d_Lab_validation']} MPa. Let's try to do better!"

def parse_materials(materials_str):
    match = re.search(r'(\d+)/(\d+) FA/GGBFS', materials_str)
    if match:
        return int(match.group(1)) / (int(match.group(1)) + int(match.group(2)))
    else:
        return None
    
def parse_curing(materials_str):
    if "Ambient curing" in materials_str:
        return "ambient"
    elif "Heat curing" in materials_str:
        return "oven"
    else:
        return None

def load_data(csv_path):
    df = pd.read_csv(csv_path)
    df['FA_GGBFS_ratio'] = df['Materials'].apply(parse_materials)
    df['curing'] = df['Materials'].apply(parse_curing)  # Add this line
    return df

df = load_data('Data/DiscoveryData_Sample.csv')

# Initialize empty DataFrame
formulation_df = pd.DataFrame(columns=["Formulation", "Strength"])

# Loop through each row in the original data
for idx, row in df.iterrows():
    
    # Get necessary attributes from row
    powder = row["Powderkg"]
    wc = row["WC"]
    materials = row["Materials"]

    # Extract Fly Ash/GGBFS ratio
    fa_ggbfs = materials.split(",")[0].split("-")[1]
    
    # Extract curing method
    curing_method = materials.split(",")[-1].strip()

    # Remove unwanted string from curing method
    curing_method = curing_method.replace(" (Rao et al. 2018)", "")
    curing_method = curing_method.replace(" (Rao et al.)", "")
    
    # Compressive strength
    strength = row["fc_28dGroundTruth"]
    
    # Create formulation string in the same format as the model's output
    formulation = f'The formulation is Powderkg = {powder}, wc = {wc}, materials = {fa_ggbfs}, curing = {curing_method}'
    
    # Append the formulation and its respective strength to the new DataFrame
    new_row = pd.DataFrame({"Formulation": [formulation], "Strength": [strength]})
    formulation_df = pd.concat([formulation_df, new_row], ignore_index=True)

print(formulation_df.iloc[50,0])

def handle_openai_error(exception):
    if isinstance(exception, openai.error.RateLimitError):
        print(f"Rate limit error. Will retry after {exception.wait_seconds} seconds.")
        time.sleep(exception.wait_seconds)
    elif isinstance(exception, openai.error.InvalidRequestError):
        print(f"Invalid request: {str(exception)}")
    elif isinstance(exception, openai.error.AuthenticationError):
        print(f"Authentication error: {str(exception)}")
    elif isinstance(exception, openai.error.ServiceUnavailableError):
        print(f"Service unavailable error. Retrying after a delay...")
        time.sleep(5)  # Sleep for 5 seconds before retrying
    elif isinstance(exception, openai.error.APIError):
        print(f"API error: {str(exception)}. Retrying after a delay...")
        time.sleep(5)  # Sleep for 5 seconds before retrying
    elif isinstance(exception, openai.error.Timeout):
        print(f"Timeout error: {str(exception)}. Retrying after a longer delay...")
        time.sleep(10)  # Sleep for 10 seconds before retrying
    else:
        raise exception
# -> here we also set the API parameters, such as temperature, etc.

def call_openai_api(messages,temp, max_retries=5, delay=5):
    for i in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                model=model_name,
                temperature=temp,
                messages=messages,
                max_tokens=250,
                n=1
            )
            return response
        except openai.error.OpenAIError as e:
            handle_openai_error(e)
            if i < max_retries - 1:  # i is zero indexed
                time.sleep(delay)  # wait before trying again
                continue
            else:
                raise
                
# Load the text from the file
with open('prompts_ID_generic.txt', 'r') as f:
    lines = f.read().splitlines()
    
# Store the contents in separate variables
instructions_text = lines[0]
system_role_text = lines[1]
context_text = lines[2]
iterate_text = lines[3]


# Create the widgets with the loaded text
layout = widgets.Layout(width='auto', height='200px')  # adjust the height and width as needed
instructions_prompt = widgets.Textarea(value=instructions_text, description='Instructions:', layout=layout)
system_role_prompt = widgets.Textarea(value=system_role_text, description='System Role:', layout=layout)
context_prompt = widgets.Textarea(value=context_text, description='Design Rules:', layout=layout)
iterate_prompt = widgets.Textarea(value=iterate_text, description='Question at each iteration:', layout=layout)


def update_file(button):
    with open('prompts_ID_generic.txt', 'w') as f:
        f.write(instructions_prompt.value + '\n')
        f.write(system_role_prompt.value + '\n')
        f.write(context_prompt.value + '\n')
        f.write(iterate_prompt.value + '\n')

    print("💽Saved!") 
# Create a "Save Prompts" button
save_button = widgets.Button(description="Save Prompts",
                             layout=widgets.Layout(width='100%', height='30px'))

# Link the button to the update_file function
save_button.on_click(update_file)

# Change the color of the button to green
save_button.style.button_color = 'lightgreen'

# Run to display the text box widgets and the save button
display(instructions_prompt, context_prompt, system_role_prompt, iterate_prompt)
display(save_button)        


The formulation is Powderkg = 380, wc = 0.55, materials = 0.5/0.5, curing = Heat curing


Textarea(value='////You are provided with general design knowledge for geopolymer concrete and lab validations…

Textarea(value='////General design knowledge //The FA/GGBFS ratio change from 0.7/0.3 to 0.5/0.5 increases com…

Textarea(value="////You are a powerful concrete formulation prediction model tasked with finding the best conc…

Textarea(value='//// Awesome! However, we have to go far and beyond! Give me an even better suggestion! Make s…

Button(description='Save Prompts', layout=Layout(height='30px', width='100%'), style=ButtonStyle(button_color=…

## Variables


In [3]:
desired_strength = 61.94

In [21]:
desired_strength = 61.94
#temperatures = [0, 0.2, 0.4, 0.6, 1]
temperatures = [1]
budget = 10
NrOfExper = 20
# Iterate over the different temperature settings
for temp in temperatures:

    
    # Repeat the whole experiment n times
    for experiment in range(NrOfExper):
        print(f"\n---\nStarting experiment {experiment+1}...\n---")
        training_data = []
        current_strength = 0.0
        iterations = 0

        # System message including both the role prompt and context
        system_message = instructions_prompt.value + '\n' + context_prompt.value + '\n' + system_role_prompt.value 

        while iterations < budget:
            iterations += 1
            print(f"\n---\nStarting iteration {iterations} at temp {temp}...")

            # Start with the system message
            messages = [
                {"role": "system", "content": system_message}
            ]

            # Add the training data to the messages
            if training_data:
                messages.append({"role": "assistant", "content": "Previously, we have tested these formulations with the following result:\n" + "\n".join(training_data)})
            
            # Add the iteration prompt
            messages.append({"role": "user", "content": iterate_prompt.value})

            # Printing the conversation history
            
#--->uncomment below if you want to read the conversation history

            #print("--- Conversation History ---")
            #for msg in messages:
                #print(f"{msg['role']}: {msg['content']}")
            
            response = call_openai_api(messages,temp)
            lab_result = None  # initialize lab_result

            # Check the role and extract the content if role is 'assistant'
            valid_solution = False
            while not valid_solution:
                response = call_openai_api(messages,temp)
                if response['choices'][0]['message']['role'] == "assistant":
                    print(response['choices'][0]['message']['content'])
                    suggested_solution = parse_solution(response['choices'][0]['message']['content'])
                    if suggested_solution is not None:
                        print(f"Parse result: {suggested_solution}")
                        lab_result = find_matching_result(formulation_df, suggested_solution)
                        if lab_result:
                            current_strength = lab_result
                            # Add the solution and its lab result to the training data
                            training_data.append(f"{response['choices'][0]['message']['content']} resulted in a strength of {current_strength} MPa.")
                            print(suggested_solution)
                            valid_solution = True
                        else:
                            print(f"Iteration {iterations}: No matching lab result found for suggestion {suggested_solution}")
                            # Add reminder to the user prompt
                            messages[-1]["content"] = iterate_prompt.value + "\nPlease remember to stick to the exact parameter grid! No extrapolation or interpolation of the parameters is allowed!  Use this exact format: 'The formulation is Powderkg = {your estimate}, wc = {your estimate}, materials = {your estimate}, curing = {your estimate}'."
                    else:
                        print(f"Iteration {iterations}: Assistant's response did not contain a valid solution. Trying again.")
                        # Add reminder to the user prompt
                        messages[-1]["content"] = iterate_prompt.value + "\nPlease remember to stick to the exact parameter grid and format: 'The formulation is Powderkg = {your estimate}, wc = {your estimate}, materials = {your estimate}, curing = {your estimate}'."
                else:
                    print(f"Iteration {iterations}: Response not from 'assistant'. Trying again.")

                    if current_strength >= desired_strength:
                        print(f"\nDesired compressive strength of {desired_strength} MPa achieved after {iterations} iterations. The solution is {suggested_solution}.")
                        break

        timestamp = str(int(time.time()))

        # create the file name
        filename = f"Results/ID/{model_name}generic_prompt_experiment_{experiment+1}_temp_{temp}_target_{desired_strength}_MPa_Budget_{budget}_zero_shot_{timestamp}.csv"

        # open the file in write mode
        with open(filename, 'w', newline='') as file:
            writer = csv.writer(file)

            # write the headers
            writer.writerow(["Formulation", "Compressive Strength"])

            # iterate over the training data
            for data in training_data:
                # parse the data to extract formulation and compressive strength
                formulation, strength_str = data.split(" resulted in a strength of ")
                strength = float(strength_str.split(" ")[0])  # convert string to float
                writer.writerow([formulation, strength])

        print(f"Data for experiment {experiment+1} and temp {temp} successfully saved to {filename}.")



---
Starting experiment 1...
---

---
Starting iteration 1 at temp 1...
The formulation is Powderkg = 450, wc = 0.45, materials = Fly-Ash/GGBFS at a ratio of 0.5/0.5, curing = Heat curing.
Parse result: {'powderkg': '450', 'wc': '0.45', 'materials': 'Fly-Ash/GGBFS at a ratio of 0.5/0.5', 'curing': 'Heat curing'}
Iteration 1: No matching lab result found for suggestion {'powderkg': '450', 'wc': '0.45', 'materials': 'Fly-Ash/GGBFS at a ratio of 0.5/0.5', 'curing': 'Heat curing'}
The formulation is Powderkg = 420, wc = 0.55, materials = 0.6/0.4, curing = Heat curing.
Parse result: {'powderkg': '420', 'wc': '0.55', 'materials': '0.6/0.4', 'curing': 'Heat curing'}
{'powderkg': '420', 'wc': '0.55', 'materials': '0.6/0.4', 'curing': 'Heat curing'}

---
Starting iteration 2 at temp 1...
The formulation is Powderkg = 450, wc = 0.5, materials = 0.5/0.5, curing = Heat curing.
Parse result: {'powderkg': '450', 'wc': '0.5', 'materials': '0.5/0.5', 'curing': 'Heat curing'}
{'powderkg': '450', 'wc':

The formulation is Powderkg = 400, wc = 0.5, materials = 0.6/0.4, curing = Heat curing
Parse result: {'powderkg': '400', 'wc': '0.5', 'materials': '0.6/0.4', 'curing': 'Heat curing'}
{'powderkg': '400', 'wc': '0.5', 'materials': '0.6/0.4', 'curing': 'Heat curing'}

---
Starting iteration 2 at temp 1...
The formulation is Powderkg = 410, wc = 0.55, materials = 0.7/0.3, curing = Heat curing
Parse result: {'powderkg': '410', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '410', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 3 at temp 1...
The formulation is Powderkg = 420, wc = 0.45, materials = 0.5/0.5, curing = Heat curing.
Parse result: {'powderkg': '420', 'wc': '0.45', 'materials': '0.5/0.5', 'curing': 'Heat curing'}
{'powderkg': '420', 'wc': '0.45', 'materials': '0.5/0.5', 'curing': 'Heat curing'}

---
Starting iteration 4 at temp 1...
The formulation is Powderkg = 450, wc = 0.5, materials = 0.7/0.3, curing = Heat c

The formulation is Powderkg = 420, wc = 0.6, materials = 0.5/0.5, curing = Ambient curing.
Parse result: {'powderkg': '420', 'wc': '0.6', 'materials': '0.5/0.5', 'curing': 'Ambient curing'}
{'powderkg': '420', 'wc': '0.6', 'materials': '0.5/0.5', 'curing': 'Ambient curing'}

---
Starting iteration 5 at temp 1...
The formulation is Powderkg = 450, wc = 0.5, materials = 0.7/0.3, curing = Heat curing.
Parse result: {'powderkg': '450', 'wc': '0.5', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '450', 'wc': '0.5', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 6 at temp 1...
The formulation is Powderkg = 390, wc = 0.6, materials = 0.6/0.4, curing = Heat curing.
Parse result: {'powderkg': '390', 'wc': '0.6', 'materials': '0.6/0.4', 'curing': 'Heat curing'}
{'powderkg': '390', 'wc': '0.6', 'materials': '0.6/0.4', 'curing': 'Heat curing'}

---
Starting iteration 7 at temp 1...
The formulation is Powderkg = 410, wc = 0.45, materials = 0.5/0.5, curing = 

Certainly! Based on the previous suggestions and feedback, I have considered a unique formulation that has not been validated before.

The formulation is Powderkg = 410, wc = 0.55, materials = 0.5/0.5, curing = Heat curing. This formulation is expected to result in a compressive strength of 55.12 MPa.
Parse result: {'powderkg': '410', 'wc': '0.55', 'materials': '0.5/0.5', 'curing': 'Heat curing. This formulation is expected to result in a compressive strength of 55.12 MPa'}
Iteration 8: No matching lab result found for suggestion {'powderkg': '410', 'wc': '0.55', 'materials': '0.5/0.5', 'curing': 'Heat curing. This formulation is expected to result in a compressive strength of 55.12 MPa'}
Apologies for not providing a unique suggestion earlier. Here is a new formulation:

The formulation is Powderkg = 450, wc = 0.5, materials = 0.6/0.4, curing = Heat curing.
Parse result: {'powderkg': '450', 'wc': '0.5', 'materials': '0.6/0.4', 'curing': 'Heat curing'}
{'powderkg': '450', 'wc': '0.5', 

The formulation is Powderkg = 400, wc = 0.55, materials = 0.5/0.5, curing = Heat curing.
Parse result: {'powderkg': '400', 'wc': '0.55', 'materials': '0.5/0.5', 'curing': 'Heat curing'}
{'powderkg': '400', 'wc': '0.55', 'materials': '0.5/0.5', 'curing': 'Heat curing'}

---
Starting iteration 8 at temp 1...
The formulation is Powderkg = 450, wc = 0.55, materials = 0.7/0.3, curing = Ambient curing.
Parse result: {'powderkg': '450', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Ambient curing'}
{'powderkg': '450', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Ambient curing'}

---
Starting iteration 9 at temp 1...
The formulation is Powderkg = 380, wc = 0.45, materials = 0.6/0.4, curing = Heat curing.
Parse result: {'powderkg': '380', 'wc': '0.45', 'materials': '0.6/0.4', 'curing': 'Heat curing'}
{'powderkg': '380', 'wc': '0.45', 'materials': '0.6/0.4', 'curing': 'Heat curing'}

---
Starting iteration 10 at temp 1...
The formulation is Powderkg = 410, wc = 0.6, materials = 0.5/0.5, 

The formulation is Powderkg = 440, wc = 0.45, materials = 0.5/0.5, curing = Heat curing.
Parse result: {'powderkg': '440', 'wc': '0.45', 'materials': '0.5/0.5', 'curing': 'Heat curing'}
{'powderkg': '440', 'wc': '0.45', 'materials': '0.5/0.5', 'curing': 'Heat curing'}

---
Starting iteration 2 at temp 1...
The formulation is Powderkg = 450, wc = 0.45, materials = 0.7/0.3, curing = Heat curing.
Parse result: {'powderkg': '450', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '450', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 3 at temp 1...
Service unavailable error. Retrying after a delay...
"The formulation is Powderkg = 420, wc = 0.5, materials = 0.6/0.4, curing = Heat curing."
Parse result: {'powderkg': '420', 'wc': '0.5', 'materials': '0.6/0.4', 'curing': 'Heat curing."'}
Iteration 3: No matching lab result found for suggestion {'powderkg': '420', 'wc': '0.5', 'materials': '0.6/0.4', 'curing': 'Heat curing."'}
Th

The formulation is Powderkg = 410, wc = 0.45, materials = 0.7/0.3, curing = Heat curing.
Parse result: {'powderkg': '410', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '410', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 4 at temp 1...
The formulation is Powderkg = 420, wc = 0.5, materials = 0.6/0.4, curing = Ambient curing.
Parse result: {'powderkg': '420', 'wc': '0.5', 'materials': '0.6/0.4', 'curing': 'Ambient curing'}
{'powderkg': '420', 'wc': '0.5', 'materials': '0.6/0.4', 'curing': 'Ambient curing'}

---
Starting iteration 5 at temp 1...
The formulation is Powderkg = 430, wc = 0.55, materials = 0.7/0.3, curing = Heat curing.
Parse result: {'powderkg': '430', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '430', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 6 at temp 1...
The formulation is Powderkg = 420, wc = 0.45, materials = 0.7/0.3, cur

Based on the previous results and the information provided, I suggest the following formulation:

The formulation is Powderkg = 380, wc = 0.55, materials = 0.5/0.5, curing = Heat curing.
Parse result: {'powderkg': '380', 'wc': '0.55', 'materials': '0.5/0.5', 'curing': 'Heat curing'}
{'powderkg': '380', 'wc': '0.55', 'materials': '0.5/0.5', 'curing': 'Heat curing'}

---
Starting iteration 8 at temp 1...
Sure, based on the previous results and the information provided, taking into account that higher powder content improves compressive strength, I suggest the following unique formulation:

The formulation is Powderkg = 450, wc = 0.45, materials = 0.7/0.3, curing = Heat curing will result in a higher compressive strength.
Parse result: {'powderkg': '450', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Heat curing will result in a higher compressive strength'}
Iteration 8: No matching lab result found for suggestion {'powderkg': '450', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Hea

The formulation is Powderkg = 440, wc = 0.55, materials = 0.6/0.4, curing = Heat curing.
Parse result: {'powderkg': '440', 'wc': '0.55', 'materials': '0.6/0.4', 'curing': 'Heat curing'}
{'powderkg': '440', 'wc': '0.55', 'materials': '0.6/0.4', 'curing': 'Heat curing'}

---
Starting iteration 5 at temp 1...
The formulation is Powderkg = 420, wc = 0.45, materials = 0.5/0.5, curing = Ambient curing
Parse result: {'powderkg': '420', 'wc': '0.45', 'materials': '0.5/0.5', 'curing': 'Ambient curing'}
{'powderkg': '420', 'wc': '0.45', 'materials': '0.5/0.5', 'curing': 'Ambient curing'}

---
Starting iteration 6 at temp 1...
My next suggestion for an even better formulation is:

The formulation is Powderkg = 450, wc = 0.5, materials = 0.7/0.3, curing = Heat curing.
Parse result: {'powderkg': '450', 'wc': '0.5', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '450', 'wc': '0.5', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 7 at temp 1...
The formulation 

The formulation is Powderkg = 450, wc = 0.45, materials = 0.7/0.3, curing = Heat curing.
Parse result: {'powderkg': '450', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '450', 'wc': '0.45', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 4 at temp 1...
The formulation is Powderkg = 400, wc = 0.6, materials = 0.6/0.4, curing = Ambient curing.
Parse result: {'powderkg': '400', 'wc': '0.6', 'materials': '0.6/0.4', 'curing': 'Ambient curing'}
{'powderkg': '400', 'wc': '0.6', 'materials': '0.6/0.4', 'curing': 'Ambient curing'}

---
Starting iteration 5 at temp 1...
The formulation is Powderkg = 410, wc = 0.55, materials = 0.7/0.3, curing = Heat curing.
Parse result: {'powderkg': '410', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Heat curing'}
{'powderkg': '410', 'wc': '0.55', 'materials': '0.7/0.3', 'curing': 'Heat curing'}

---
Starting iteration 6 at temp 1...
The formulation is Powderkg = 380, wc = 0.5, materials = 0.5/0.5, curi

In [None]:
### BO Baseline

In [31]:
# Define an acquisition function
def acquisition(X, model, scaler, epsilon=0.01):
    prediction, std_dev = model.predict(scaler.transform(X), return_std=True)
    return -(prediction + epsilon * std_dev)

# load the data
file_path = os.path.join('Data', 'numeric_data.csv')
data = pd.read_csv(file_path)

# Extract features and target
features = ["Powderkg", "Liquidkg", "WC", "Fly_Ash_ratio", "GGBFS_ratio", "temperature"]
target = "fc_28dGroundTruth"

# Initialize kernel and GP model
#kernel = ConstantKernel(1.0, (1e-3, 1e3)) * Matern(length_scale=10, nu=1.5)
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gpr = GaussianProcessRegressor(kernel=kernel)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Add timestamp

for experiment in range(30):
    print(f"Experiment: {experiment+1}")

    # Randomly sample initial training set
    initial_sample_size = 4
    train_data = data.sample(n=initial_sample_size)
    while train_data[target].max() >= desired_strength:
        train_data = data.sample(n=initial_sample_size)
        
    # All other data is potential test data
    test_data = data.drop(train_data.index)

    # Define scalers outside the loop
    X_scaler = StandardScaler()
    y_scaler = MinMaxScaler()

    for _ in range(10):
        # Fit and transform the data
        X_train = X_scaler.fit_transform(train_data[features])
        y_train = y_scaler.fit_transform(train_data[target].values.reshape(-1, 1))

        # Train GP model
        gpr.fit(X_train, y_train)

        # Use the acquisition function to select the next point from the test set
        X_test = X_scaler.transform(test_data[features])
        acq_values = -acquisition(X_test, gpr, X_scaler)  # negative because we are maximizing

        # Select the point that maximizes the acquisition function
        max_acq_index = np.argmax(acq_values)
        X_next = X_test[max_acq_index]

        # Get the original index from the test_data DataFrame
        original_index = test_data.index[max_acq_index]

        # Append X_next to training data
        train_data = train_data.append(test_data.loc[original_index])

        # Remove X_next from test_data
        test_data = test_data.drop(original_index)

        # If we have found a material with the desired strength, we stop sampling
        if train_data.loc[original_index, target] >= desired_strength:
            break
     
    filename = f"results/BO/experiment_{experiment+1}_initialsample_{initial_sample_size}_kernel_{str(gpr.kernel).replace(' ', '_')}_timestamp_{timestamp}.csv"
    train_data.to_csv(filename, index=False)


Experiment: 1


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 2
Experiment: 3


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 4
Experiment: 5


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 6


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 7


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 8
Experiment: 9


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 10


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 11


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 12


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 13


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 14
Experiment: 15


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 16
Experiment: 17


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 18
Experiment: 19


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 20


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 21
Experiment: 22


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 23
Experiment: 24


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 25
Experiment: 26


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 27
Experiment: 28


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 29
Experiment: 30


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


'  filename = f"results/BO/experiment_{experiment+1}_initialsample_{initial_sample_size}_kernel_{str(gpr.kernel).replace(\' \', \'_\')}_timestamp_{timestamp}.csv"\n#    train_data.to_csv(filename, index=False)'

In [24]:
from sklearn.preprocessing import StandardScaler
from lolopy.learners import RandomForestRegressor
import numpy as np
import pandas as pd
desired_strength = 61.94

def acquisition(X, model, scaler):
    '''Acquisition function that finds the maximum of predictions plus uncertainties.'''
    # Get predictions and uncertainties
    preds, stds = model.predict(X, return_std=True)
    # Return predictions plus uncertainties
    return preds + stds

def run_experiment(data, features, target, desired_strength, experiment_num=30, initial_sample_size=4, iteration_num=10):
    # Initialize Random Forest model
    dtr = RandomForestRegressor()

    # Define scaler outside the loop
    X_scaler = StandardScaler()

    for experiment in range(experiment_num):
        print(f"Experiment: {experiment+1}")
        # Randomly sample initial training set
        train_data = data.sample(n=initial_sample_size)

        # All other data is potential test data
        test_data = data.drop(train_data.index)

        for _ in range(iteration_num):
            # Fit and transform the data
            X_train = X_scaler.fit_transform(train_data[features])
            y_train = train_data[target].values
            # Check if number of samples less than 8, if yes, tile them
            if X_train.shape[0] < 8:
                repeat_times = 8 // X_train.shape[0] + 1
                X_train = np.tile(X_train, (repeat_times, 1))
                y_train = np.tile(y_train, repeat_times)
            # Train RF model
            dtr.fit(X_train, y_train)

            # Use the acquisition function to select the next point from the test set
            X_test = X_scaler.transform(test_data[features])
            acq_values = acquisition(X_test, dtr, X_scaler)

            # Select the point that maximizes the acquisition function
            max_acq_index = np.argmax(acq_values)
            X_next = X_test[max_acq_index]

            # Get the original index from the test_data DataFrame
            original_index = test_data.index[max_acq_index]

            # Append X_next to training data
            train_data = train_data.append(test_data.loc[original_index])

            # Remove X_next from test_data
            test_data = test_data.drop(original_index)

            # If we have found a material with the desired strength, we stop sampling
            if train_data.loc[original_index, target] >= desired_strength:
                break
        filename = f"results/RF_ID/experiment_{experiment+1}_RF_initialsample_{initial_sample_size}_timestamp_{timestamp}.csv"
        train_data.to_csv(filename, index=False)
        
        
# load the data
file_path = os.path.join('Data', 'numeric_data.csv')
data = pd.read_csv(file_path)

# Extract features and target
features = ["Powderkg", "Liquidkg", "WC", "Fly_Ash_ratio", "GGBFS_ratio", "temperature"]
target = "fc_28dGroundTruth"

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Add timestamp
               
run_experiment(data, features, target, desired_strength, experiment_num=30, initial_sample_size=4, iteration_num=10)
                

Experiment: 1


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 2


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 3


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 4
Experiment: 5


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 6


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 7
Experiment: 8


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 9


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 10
Experiment: 11
Experiment: 12


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 13


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 14
Experiment: 15
Experiment: 16


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 17


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 18
Experiment: 19


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 20
Experiment: 21


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 22
Experiment: 23


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 24
Experiment: 25


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 26


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 27


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 28
Experiment: 29


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 30


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


In [38]:
#Random Draw

import numpy as np
import pandas as pd
desired_strength = 61.94


def run_experiment(data, features, target, desired_strength, experiment_num=30, initial_sample_size=0, iteration_num=10):
    # Initialize Random Forest model
    dtr = RandomForestRegressor()

    # Define scaler outside the loop
    X_scaler = StandardScaler()

    for experiment in range(experiment_num):
        print(f"Experiment: {experiment+1}")
        # Randomly sample initial training set
        train_data = data.sample(n=initial_sample_size)

        # All other data is potential test data
        test_data = data.drop(train_data.index)

        for _ in range(iteration_num):
            # Fit and transform the data
            # Randomly select a point from the test set
            original_index = np.random.choice(test_data.index)

            # Append X_next to training data
            train_data = train_data.append(test_data.loc[original_index])

            # Remove X_next from test_data
            test_data = test_data.drop(original_index)

            # If we have found a material with the desired strength, we stop sampling
            if train_data.loc[original_index, target] >= desired_strength:
                break
        filename = f"results/RP_ID/experiment_{experiment+1}_RP_timestamp_{timestamp}.csv"
        train_data.to_csv(filename, index=False)
        
        
# load the data
file_path = os.path.join('Data', 'numeric_data.csv')
data = pd.read_csv(file_path)

# Extract features and target
features = ["Powderkg", "Liquidkg", "WC", "Fly_Ash_ratio", "GGBFS_ratio", "temperature"]
target = "fc_28dGroundTruth"

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Add timestamp
               
run_experiment(data, features, target, desired_strength, experiment_num=30, initial_sample_size=0, iteration_num=10)
                

Experiment: 1
Experiment: 2


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 3
Experiment: 4
Experiment: 5


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 6
Experiment: 7
Experiment: 8


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 9
Experiment: 10
Experiment: 11


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 12
Experiment: 13


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 14
Experiment: 15


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 16
Experiment: 17
Experiment: 18
Experiment: 19


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 20
Experiment: 21
Experiment: 22


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 23
Experiment: 24
Experiment: 25


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_da

Experiment: 26
Experiment: 27


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 28
Experiment: 29


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])


Experiment: 30


  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
  train_data = train_data.append(test_data.loc[original_index])
