In [1]:
import jax
from jax import lax, random, numpy as jnp
from jax.tree_util import tree_flatten, tree_unflatten

import flax
from flax import linen as nn

import sympy as sy
import numpy as np

import sys
sys.path.append("..")

from eql.eqlearner import EQL
from eql.symbolic import get_symbolic_expr, get_symbolic_expr_layer

import optax
import scipy
from functools import partial

In [2]:
funs = ['mul', 'cos', 'sin']*2
e = EQL(n_layers=2, functions=funs, features=1)
key = random.PRNGKey(0)

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


In [3]:
N = 1000
xdim = 1
x = (random.uniform(key, (N, xdim))-.5) * 2
#x = np.array([[1., 2.]]).T
#x = np.linspace(-1, 1, N)[:,None]
#y = x[:,0] + jnp.cos(x[:,1])
y = jnp.cos(x) + 1 - x**2 

In [14]:
params = e.init({'params':key}, x);

In [18]:
def mse_fn(params):
    pred = e.apply(params, x)
    return jnp.mean((pred-y)**2)


def get_mask_spec(thresh, params):
    flat, spec = tree_flatten(params)
    mask = [jnp.abs(f) > thresh for f in flat]
    return mask, spec

def apply_mask(mask, spec, params):
    flat, _ = tree_flatten(params)
    masked_params = tree_unflatten(spec, [f*m for f,m in zip(flat, mask)])
    return masked_params


def get_masked_mse(thresh, params):
    mask, spec = get_mask_spec(thresh, params)
    def masked_mse(params):
        masked_params = apply_mask(mask, spec, params)
        return mse_fn(masked_params)
    return jax.jit(masked_mse)
    

def l1_fn(params):
    return sum(
        jnp.abs(w).mean() for w in jax.tree_leaves(params["params"])
    )

In [25]:
params

FrozenDict({
    params: {
        last: {
            bias: DeviceArray([-0.0034094], dtype=float32),
            kernel: DeviceArray([[1.7903351e-03],
                         [1.3192539e-03],
                         [1.5542713e-03],
                         [6.3857739e-03],
                         [6.5374327e-01],
                         [3.0401422e-04]], dtype=float32),
        },
        layers_0: {
            linear_layer: {
                bias: DeviceArray([-0.00407849,  0.00037369, -0.00295608, -0.0011525 ,
                              0.0031756 ,  0.00780274,  0.00048989,  0.00183793],            dtype=float32),
                kernel: DeviceArray([[-1.7430037e-02,  4.1446046e-04,  1.5974020e-03,
                               8.9088688e-03, -7.1354121e-01, -7.0021921e-01,
                               2.9338847e-03, -5.2222842e-03]], dtype=float32),
            },
        },
        layers_1: {
            linear_layer: {
                bias: DeviceArray([ 0.00505211,

In [19]:
def get_loss(lamba):
    def loss_fn(params):
        return mse_fn(params)  + lamba * l1_fn(params)
    return loss_fn

def get_loss_grad(lamba):
    loss = get_loss(lamba)
    return jax.jit(jax.value_and_grad(loss))

In [20]:
tx = optax.adam(learning_rate=1e-2)
opt_state = tx.init(params)

In [21]:
loss_grad_1 = get_loss_grad(0)
loss_grad_2 = get_loss_grad(1e-2)

for i in range(1000):
    loss_val, grads = loss_grad_1(params)
    updates, opt_state = tx.update(grads, opt_state)
    params = optax.apply_updates(params, updates)
    if i % 99 == 0:
        print(loss_val)

for i in range(3000):
    loss_val, grads = loss_grad_2(params)
    updates, opt_state = tx.update(grads, opt_state)
    params = optax.apply_updates(params, updates)
    if i % 99 == 0:
        print(loss_val)
        print(l1_fn(params))
        
thr = 1e-3
loss_grad_masked = jax.jit(jax.value_and_grad(get_masked_mse(thr, params)))
mask, spec = get_mask_spec(thr, params)

for i in range(1000):
    loss_val, grads = loss_grad_masked(params)
    updates, opt_state = tx.update(grads, opt_state)
    params = optax.apply_updates(params, updates)
    if i % 99 == 0:
        print(loss_val)

2.2959127
0.00017843182
3.0303927e-05
2.4509332e-05
2.23571e-05
2.0150723e-05
1.7939463e-05
1.5790602e-05
1.3756455e-05
1.1876678e-05
1.0179508e-05
0.02259168
2.257642
0.01931724
1.9251344
0.016801048
1.6758217
0.015045944
1.5004771
0.013928536
1.3896518
0.012968791
1.2928609
0.011959264
1.1917928
0.010990254
1.0950935
0.010088094
1.0057241
0.009269744
0.9251915
0.008304355
0.82905746
0.007351574
0.7343733
0.0066860174
0.668099
0.006403017
0.6396043
0.0061369035
0.61296296
0.005884754
0.5880971
0.005644731
0.5640638
0.0054247337
0.5422503
0.0052309954
0.52235925
0.005069772
0.5065864
0.004952409
0.49519178
0.004839332
0.4837908
0.004732088
0.47303215
0.0046400446
0.46341783
0.0045698294
0.45661515
0.004502493
0.45048264
0.004459777
0.4439394
0.004446043
0.4391326
0.004339714
0.4334002
0.004281569
0.4267202
0.0042609368
0.42128238
9.298407e-06
5.112177e-06
3.1451398e-06
1.8994537e-06
1.123328e-06
6.491357e-07
3.6583265e-07
4.7616268e-05
1.06150914e-07
5.3155027e-08
1.3374874e-05


In [22]:
symb = get_symbolic_expr(apply_mask(mask, spec, params), funs)[0]
symb

0.653743267059326*(1.03431890561041 - 0.747141209541702*x0**2)*(2.95883375230604 - 0.0774600916783715*x0**2)

In [10]:
sy.expand(symb)

0.0378523975426517*x0**4 - 0.768726070993095*x0**2*cos(0.00596933532506227*x0) - 0.728908565722812*x0**2*cos(0.0100719733163714*x0) + 0.635997118715956*cos(0.00596933532506227*x0)**2 + 0.99510495325993*cos(0.00596933532506227*x0)*cos(0.0100719733163714*x0) + 0.368336395194428*cos(0.0100719733163714*x0)**2