In [7]:
import re
import pandas as pd

In [70]:
df = pd.read_csv("../data/AIFeynman/FeynmanEquations.csv")

# Drop unwanted columns
df.drop(
    columns=[
        "v4_name",
        "v4_low",
        "v4_high",
        "v5_name",
        "v5_low",
        "v5_high",
        "v6_name",
        "v6_low",
        "v6_high",
        "v7_name",
        "v7_low",
        "v7_high",
        "v8_name",
        "v8_low",
        "v8_high",
        "v9_name",
        "v9_low",
        "v9_high",
        "v10_name",
        "v10_low",
        "v10_high",
    ],
    inplace=True,
)

In [71]:
for i, row in df.iterrows():
    num_vars = 0
    for var in [i for i in range(1, 11)]:
        if pd.isna(row[f'v{var}_name']) is False:
            num_vars += 1
    df.at[i, "# variables"] = num_vars

df = df[df["# variables"] <= 3]

In [72]:
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,Filename,Number,Output,Formula,# variables,v1_name,v1_low,v1_high,v2_name,v2_low,...,v7_high,v8_name,v8_low,v8_high,v9_name,v9_low,v9_high,v10_name,v10_low,v10_high
0,I.6.2a,1.0,f,exp(-theta**2/2)/sqrt(2*pi),1.0,theta,1.0,3.0,,,...,,,,,,,,,,
1,I.6.2,2.0,f,exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma),2.0,sigma,1.0,3.0,theta,1.0,...,,,,,,,,,,
2,I.6.2b,3.0,f,exp(-((theta-theta1)/sigma)**2/2)/(sqrt(2*pi)*...,3.0,sigma,1.0,3.0,theta,1.0,...,,,,,,,,,,
3,I.10.7,6.0,m,m_0/sqrt(1-v**2/c**2),3.0,m_0,1.0,5.0,v,1.0,...,,,,,,,,,,
4,I.12.1,8.0,F,mu*Nn,2.0,mu,1.0,5.0,Nn,1.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,,,,,0.0,,,,,,...,,,,,,,,,,
78,,,,,0.0,,,,,,...,,,,,,,,,,
79,,,,,0.0,,,,,,...,,,,,,,,,,
80,,,,,0.0,,,,,,...,,,,,,,,,,


In [74]:
df.iloc[13]

Filename                      I.26.2
Number                          26.0
Output                        theta1
Formula        arcsin(n*sin(theta2))
# variables                      2.0
v1_name                            n
v1_low                           0.0
v1_high                          1.0
v2_name                       theta2
v2_low                           1.0
v2_high                          5.0
v3_name                          NaN
v3_low                           NaN
v3_high                          NaN
v4_name                          NaN
v4_low                           NaN
v4_high                          NaN
v5_name                          NaN
v5_low                           NaN
v5_high                          NaN
v6_name                          NaN
v6_low                           NaN
v6_high                          NaN
v7_name                          NaN
v7_low                           NaN
v7_high                          NaN
v8_name                          NaN
v

In [68]:
var_map = {
    "v1_name": "x_1", 
    "v2_name": "x_2", 
    "v3_name": "x_3",
    # "v4_name": "x_4",
    # "v5_name": "x_5",
    # "v6_name": "x_6",
    # "v7_name": "x_7",
    # "v8_name": "x_8",
    # "v9_name": "x_9",
    # "v10_name": "x_10",
}

converted_data = []

for i, row in df.iterrows():

    formula = row["Formula"]
    support = {}

    vars = [f'v{i}_' for i in range(1, 4)]

    for var in vars:

        orig_var_name = row[f"{var}name"]
        

        if type(orig_var_name) is str:
            
            formula = re.sub(orig_var_name, var_map[f"{var}name"], formula)
            support[var_map[f"{var}name"]] = {
                "max": row[f"{var}high"],
                "min": row[f"{var}low"],
            }

    new_row = {"eq": formula, "support": str(support), "num_points": 500}

    converted_data.append(new_row)

In [69]:
converted_df = pd.DataFrame(data=converted_data)
converted_df.to_csv("../data/AIFeynman/ai_feynman_complete.csv")