In [1]:
import os
os.environ['OPENAI_API_KEY'] = '***'


In [2]:
import sys
sys.path.append('../')

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
from prompt_nikuradse_p3 import SYS_MSG, IGNITE, ITER

#nikuradse data handles two things differently
#one is feedback loop
#other is scipy optimization, which is done 10 times, and the best based MAE is selected
from experiment import nikuradse

In [3]:
import pandas as pd

#Load selected data for LLM input
df = pd.read_csv("datasets/" + 'nikuradseM_36pts.csv')
data = df.to_dict(orient='list')
all_keys = list(data.keys())
all_values = list(data.values())
y = all_values[2]
x1 = all_values[0]
x2 = all_values[1]
dep_var = str(y)
indep_var = [str(x1), str(x2)]

#Load full data for SciPy optimization
df_O = pd.read_csv("datasets/" + 'nikuradse_modified.csv')
data_O = df_O.to_dict(orient='list')
all_keys_O = list(data_O.keys())
all_values_O = list(data_O.values())
y_O = all_values_O[2]
x1_O = all_values_O[0]
x2_O = all_values_O[1]
dep_var_O = str(y_O)
indep_var_O = [str(x1_O), str(x2_O)]


context = "The data is from an experiment that recorded turbulent friction in rough pipes with pipe roughness (x1) and Reynolds number (x2) as independent variables and turbulent friction (y) as the dependent variable."


In [4]:
import json 
N = 3
temp = 0.7
model = "gpt-4-0613"
total_iterations = 5
trim_every_iterations = 3
num_equations_to_keep = 5

sys_msg = SYS_MSG
ignite_msg=IGNITE
iter_msg = ITER

# Initialize an empty list to store the results
results_fromAllruns = []
iteration_infos_fromAllruns = []


for i in range(1):
    print(f"Running experiment {i+1}...")
    experiment = nikuradse(dep_var_O, indep_var_O, dep_var, indep_var, 
                           N, temp, context, 
                           sys_msg, ignite_msg, iter_msg,
                           model)
    
    CombResults, all_expressions, iteration_info, usage_list, total_chain_run_time, LLMrawExpressions = experiment.run(total_iterations, trim_every_iterations, num_equations_to_keep)
    
    results_fromAllruns.append({
        'CombResults': CombResults, 
        'all_expressions': all_expressions, 
        'usage_list': usage_list, 
        'total_chain_run_time': total_chain_run_time
    })
    
    # Store the iteration info
    iteration_infos_fromAllruns.append(iteration_info)

    print(f"Experiment {i+1} finished. Cost: {experiment.cost()}")

    directory_path = "tests/Nikuradse_P3S1"
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)  

    with open(f"{directory_path}/PromptsUsed.txt", 'w') as f:
        f.write(f"Sytstem Message: {SYS_MSG}\n")
        f.write(f"Starting Prompt: {IGNITE}\n")
        f.write(f"Iteration Prompt: {ITER}\n")

    with open(f"{directory_path}/run{i+1}.txt", 'w') as f:
        f.write("Final Results:\n")
        for result in json.loads(CombResults):
            json.dump(result, f, indent=2)
            f.write("\n")
        f.write(f"Cost for this run: ${experiment.cost()}\n")
        f.write(f"Total chain run time: {total_chain_run_time} seconds.\n")
        f.write(f"LLM-generated RawExpressions: {LLMrawExpressions} \n")
        f.write("\n\nIteration Information:\n")
        for info in iteration_info:
            json.dump(info, f, indent=2)
            f.write("\n")

Running experiment 1...
Run 1: MAE = 0.25793867
Run 2: MAE = 0.25793867
Run 3: MAE = 0.25793867
Run 4: MAE = 0.25793867
Run 5: MAE = 0.25793867
Run 6: MAE = 0.25793867
Run 7: MAE = 0.25793867
Run 8: MAE = 0.25793867
Run 9: MAE = 0.25793867
Run 10: MAE = 0.25793867
Iteration:1
SciPy feedback used for this iteration:
[
     {
          "equation": "c[0]/x1",
          "complexity": 3,
          "mae": 0.25793867,
          "mse": 0.07170147,
          "fitted_params": [
               14.537254111331155
          ]
     }
]
LLM thoughts:
Scratch Pad: 

Given the existing equation c[0]/x1 and its performance metrics, we can devise new equations by incorporating the second variable x2 and also by adding more complexity to the existing equation. 

As per the analysis of the dataset, it can be seen that the dependent variable 'y' is inversely proportional to 'x1'. This is evident from the equation c[0]/x1. Also, 'y' seems to have a non-linear relationship with 'x1' and 'x2'. 

We can incorpo