In [11]:
import os
os.environ['OPENAI_API_KEY'] = '***'


In [7]:
import sys
sys.path.append('../')

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
from prompt_ADSC import SYS_MSG, IGNITE, ITER

#langmuir data handles sorting of feedback loop differently (same like Nikuradse)
#but scipy only optimizes one time (unlike Nikuradse)
from experiment import ds_langmuir

In [9]:
import pandas as pd
df = pd.read_csv('../datasets/isobutaneT277Fixed.csv')
data = df.to_dict(orient='list')
all_keys = list(data.keys())
all_values = list(data.values())
y = all_values[1]
x1 = all_values[0]


dep_var = str(y)
indep_var = [str(x1)]



context = "The data is about isobutane adsorbing onto silicalite at constant temperature, where the independent variable (x1) is pressure, and the dependent variable (y) is loading"




In [10]:
import json 
N = 3
temp = 0.7
model = "gpt-4-0613"
total_iterations = 5
trim_every_iterations = 3
num_equations_to_keep = 5

sys_msg = SYS_MSG
ignite_msg=IGNITE
iter_msg = ITER

# Initialize an empty list to store the results
results_fromAllruns = []
iteration_infos_fromAllruns = []

for i in range(1):
    print(f"Running experiment {i+1}...")
    experiment = ds_langmuir(dep_var, indep_var, 
                           N, context, temp,
                           sys_msg, ignite_msg, iter_msg,
                           model)
    
    CombResults, all_expressions, iteration_info, usage_list, total_chain_run_time, LLMrawExpressions = experiment.run(total_iterations, trim_every_iterations, num_equations_to_keep)
    
    results_fromAllruns.append({
        'CombResults': CombResults, 
        'all_expressions': all_expressions, 
        'usage_list': usage_list, 
        'total_chain_run_time': total_chain_run_time
    })
    
    # Store the iteration info
    iteration_infos_fromAllruns.append(iteration_info)

    print(f"Experiment {i+1} finished. Cost: {experiment.cost()}")

    directory_path = "tests/ds_langmuir"
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)  

    with open(f"{directory_path}/PromptsUsed.txt", 'w') as f:
        f.write(f"Sytstem Message: {SYS_MSG}\n")
        f.write(f"Starting Prompt: {IGNITE}\n")
        f.write(f"Iteration Prompt: {ITER}\n")

    with open(f"{directory_path}/run{i+1}.txt", 'w') as f:
        f.write("Final Results:\n")
        for result in json.loads(CombResults):
            json.dump(result, f, indent=2)
            f.write("\n")
        f.write(f"Cost for this run: ${experiment.cost()}\n")
        f.write(f"Total chain run time: {total_chain_run_time} seconds.\n")
        f.write(f"LLM-generated RawExpressions: {LLMrawExpressions} \n")
        f.write("\n\nIteration Information:\n")
        for info in iteration_info:
            json.dump(info, f, indent=2)
            f.write("\n")

Running experiment 1...
['[\n   {\n      "equation": "c[0]*log(x1)+c[1]",\n      "complexity": 6,\n      "mse": 0.00596731\n   },\n   {\n      "equation": "c[0]*log(x1)+c[1]*sqrt(x1)",\n      "complexity": 9,\n      "mse": 0.22563588\n   },\n   {\n      "equation": "c[0]*log(x1)+c[1]/x1",\n      "complexity": 8,\n      "mse": 0.24090395\n   }\n]']
Iteration:1
SciPy feedback used for this iteration:
[
   {
      "equation": "c[0]*log(x1)+c[1]",
      "complexity": 6,
      "mse": 0.00596731
   },
   {
      "equation": "c[0]*log(x1)+c[1]*sqrt(x1)",
      "complexity": 9,
      "mse": 0.22563588
   },
   {
      "equation": "c[0]*log(x1)+c[1]/x1",
      "complexity": 8,
      "mse": 0.24090395
   }
]
LLM thoughts:
Scratch Pad:

Let's try to reduce the complexity and loss by using operators that might be relevant for this particular set of data. We need to be careful to avoid SR-similar expressions to the ones already suggested.

1. The first equation uses a logarithmic function. We could