# Load and Inspect pareto_trials.pkl
We will load the `pareto_trials.pkl` file and inspect its structure to understand how formulas and coefficients are stored.

In [1]:
import pickle
import pandas as pd
import numpy as np
import sympy as sp

In [2]:
# Load the pickle file
with open('../pareto_trials.pkl', 'rb') as f:
    data = pickle.load(f)

# Display the type and a sample
display(data.head())

Unnamed: 0,layers,units,total_neurons,mse,complexity,expr0,expr1,expr2
0,1,[7],7,2e-06,40.0,-0.01*x_1/x_0**1.0 + 0.09*x_1**2.0/x_0**1.0 - ...,-0.01*x_1/x_0**1.0 + 0.05*x_1**2.0/x_0**1.0 - ...,-0.04*x_1**2.0/x_0**1.0 - 0.75*x_1 - 0.01*x_1*...
1,2,"[8, 3]",11,1.6e-05,33.0,0.02*x_1**2.0/x_0**1.0 - 0.02*x_0 - 0.64*x_1 -...,0.01*x_1**2.0/x_0**1.0 - 0.01*x_0 - 1.39*x_1 -...,-0.01*x_1**2.0/x_0**1.0 + 0.01*x_0 - 0.75*x_1 ...
2,1,[2],2,1.9e-05,20.0,-0.01*x_1/x_0**1.0 - 0.03*x_0 - 0.51*x_1 + 0.02,-0.01*x_1/x_0**1.0 - 0.01*x_0 - 1.33*x_1 + 0.01,0.01*x_0 - 0.82*x_1 - 0.01
3,2,"[3, 3]",6,2.5e-05,17.0,-0.01*x_1/x_0**1.0 + 0.01*x_0**2.0 - 0.58*x_1 ...,-0.01*x_1/x_0**1.0 - 1.36*x_1 + 0.01,-0.78*x_1
4,1,[3],3,6.9e-05,8.0,-0.63*x_1 - 0.01,-1.39*x_1 - 0.01,0.01 - 0.76*x_1


In [3]:
def expr_round(expr:str, threshold:float=0.01) -> str:
    """
    selects all numbers in the expression and zero them if absolute value is below the threshold
    works on string representation of sympy expression
    """
    expr = sp.sympify(expr)
    for n in expr.atoms(sp.Number):
        if abs(n) < threshold:
            expr = expr.subs(n, 0)
    return expr

In [24]:
# print expressions for all trials
for i, row in data.iterrows():
    expr = row['expr2']
    expr = expr_round(expr, threshold=0.02)
    # show the expression in native sympy format
    print(f"Trial {i}: {expr}")

Trial 0: -0.04*x_1**2.0/x_0**1.0 - 0.75*x_1
Trial 1: -0.75*x_1 + 0.14*x_1**2.0
Trial 2: -0.82*x_1
Trial 3: -0.78*x_1
Trial 4: -0.76*x_1


In [17]:
2**.5

1.4142135623730951

In [20]:
1/8

0.125

In [16]:
1/2**0.5

0.7071067811865475

# Identify and Simplify Formulas
We will identify where the formulas and their coefficients are stored, and zero out coefficients that are below a threshold (e.g., 1e-3).

In [25]:
expr0=sp.sympify((data['expr0'].iloc[0]))
expr1=sp.sympify((data['expr1'].iloc[0]))
expr2=sp.sympify((data['expr2'].iloc[0]))
# zero expression coefficients if its<0.02
expr0 = expr_round(expr0, threshold=0.02)
expr1 = expr_round(expr1, threshold=0.02)
expr2 = expr_round(expr2, threshold=0.02)

In [26]:
expr0

0.09*x_1**2.0/x_0**1.0 - 0.65*x_1

In [27]:
expr1

0.05*x_1**2.0/x_0**1.0 - 1.4*x_1

In [28]:
expr2

-0.04*x_1**2.0/x_0**1.0 - 0.75*x_1