In [24]:
import os
from sympy import symbols, sympify
from sympy.utilities.lambdify import lambdify
from scipy.optimize import curve_fit

In [22]:
# cd llm-physics-discovery/

In [6]:
from src.simulate import projectile

In [27]:
# Generate the *full* noisy dataset for fitting
t, y, meta = projectile(v0=20, theta_deg=45, noise=0.05, num_points=200, seed=0)

In [15]:
with open("/mmfs1/home/oreoluwa.alade/llm-physics-discovery/responses/projectile_formula.txt") as f:
    formula_text = f.read()
    
print("formula_text: ", formula_text)

formula_text:  y = h0 + v0 * t - (1/2) * g * t**2 + A * sin(B * t)



In [17]:
def extract_rhs(expr_str):
    if "=" in expr_str:
        return expr_str.split("=", 1)[1].strip()
    return expr_str.strip()

rhs_str = extract_rhs(formula_text)

t_sym = symbols("t")
expr = sympify(rhs_str)

print("Parsed Sympy expr:", expr)

Parsed Sympy expr: A*sin(B*t) - g*t**2/2 + h0 + t*v0


In [18]:
# collect all free symbols except the dependent variable t
free_syms = [s for s in expr.free_symbols if s != t_sym]
print("Free parameters to fit: ", free_syms)

Free parameters to fit:  [B, A, v0, h0, g]


In [23]:
# lambdify into numpy function
f_np = lambdify((t_sym, *free_syms), expr, modules="numpy")

def model_wrapper(t_vals, *theta):
    return f_np(t_vals, *theta)

# crude initial guesses: 1.0 for each param
p0 = [1.0] * len(free_syms)
print("Initial guesses:", p0)

Initial guesses: [1.0, 1.0, 1.0, 1.0, 1.0]


In [28]:
# curve fitting 
popt, pcov = curve_fit(model_wrapper, t, y, p0=p0, maxfev=10000)
print("Optimal parameters:", dict(zip([s.name for s in free_syms], popt)))

Optimal parameters: {'B': 0.8527785575324044, 'A': -0.7483796880172721, 'v0': 15.019957770226801, 'h0': -0.044383318073662005, 'g': 10.2994102219568}


### Comparing GPT’s Fitted Formula with Ground Truth

At this stage, we have two important things:

1.  **Ground truth parameters** – the ones I originally used to simulate the projectile motion:
    - $v_0 = 20.0 \, \text{m/s}$
    - $g = 9.81 \, \text{m/s}^2$
    - $h_0 = 0.0$

    These represent the "real" physics that generated the noisy dataset.

2.  **GPT + curve fitting parameters** – the ones obtained by taking GPT’s guessed symbolic formula and fitting it to the noisy data:
    - Example: $v_0 \approx 15.0$, $g \approx 10.3$, plus extra terms $A, B$ introduced by GPT.

The comparison highlights an important idea:
- GPT does not always produce the **exact** physical law.
- However, by fitting GPT’s guess to real data, we can test how **close** its symbolic hypothesis is to the truth.
- This is the crux of the project: **using LLMs to propose symbolic laws, then validating them against data.**