In [1]:
import sys
pdefind_path = "/Users/pongpisit/Desktop/research/PDE-FIND/"
sys.path.insert(0, pdefind_path)
from misc import h5file, pickle_load
from itertools import combinations

import os
from os.path import join as pjoin
from pathlib import Path
import yaml

import numpy as np
from jax import numpy as jnp
import matplotlib.pyplot as plt
import dedalus.public as d3
import sympy
import pandas as pd
from scipy.io import loadmat
from scipy.optimize import curve_fit
from jaxfit import CurveFit
from levenberg_marquardt import lm as lm_curve_fit
from functools import partial

def log_like_value(prediction, ground):                                                                                                               
    nobs = float(ground.shape[0])
    nobs2 = nobs / 2.0
    ssr = np.sum(np.abs(ground - prediction)**2)
    llf = -nobs2 * np.log(2 * np.pi) - nobs2 * np.log(ssr / nobs) - nobs2
    return llf

def BIC_AIC(prediction, ground, nparams, reg_func = lambda x: x):
    nparams = reg_func(nparams)
    llf = log_like_value(prediction, ground)
    return -2*llf + np.log(ground.shape[0])*nparams, -2*llf + 2*nparams

import re
from collections import Counter

def ef(c):
    if c > 1: return f"dx({ef(c-1)})"
    else: return "dx(u)"
    
def convert2dedalus(input_string):
    old_strings = set(re.findall(r"u_\d+", input_string))
    new_strings = [ef(s.count('1')) for s in old_strings]
    for o, n in zip(old_strings, new_strings):
        input_string = input_string.replace(o, n)
    return input_string

In [2]:
noise_lv = 30
complexity = 2

fp1 = "./IPI_output_files/PMS_data.h5"
fp2 = "./IPI_output_files/encoded_pde_names.yaml"

x = loadmat('../Datasets/burgers.mat')['x'][0]
X_pre, best_subsets, un, y_pre = h5file(file_path=fp1, mode='r', 
                                                        return_dict=False)

with open(fp2, 'r') as f:
    config = yaml.load(f, yaml.Loader)
f.close()
encoded_feature_names = config["encoded_feature_names"]
encoded_pde_names = config["encoded_pde_names"]
all_subsets = list(combinations(range(len(config["encoded_feature_names"])), complexity))

['X_pre', 'best_subsets', 'un', 'y_pre']


In [3]:
all_subsets = list(combinations(range(len(config["encoded_feature_names"])), complexity))
scores = []
for s in all_subsets:
    inp = X_pre[:, s]
    w = np.linalg.lstsq(inp, y_pre, rcond=None)[0]
    scores.append(((y_pre-inp@w)**2).mean())
    
init_params = np.linalg.lstsq(X_pre[:, all_subsets[np.argmin(scores)]], 
                              y_pre, rcond=None)[0].flatten().astype(np.float64)

effective_terms = [convert2dedalus(config["encoded_feature_names"][i]) 
                   for i in all_subsets[np.argmin(scores)]]
# print(all_subsets[np.argmin(scores)])
print(effective_terms)
print(init_params)

['dx(dx(u))', 'u*dx(u)']
[ 0.10128807 -1.00533521]


In [4]:
# hof = pd.read_csv("./hall/hof.csv")
# equation = hof.iloc[np.argmax(hof["score"])]
# pysr_params = np.array(sorted([float(atom) for atom in sympy.sympify(equation.equation).atoms() if atom.is_number]))

# def initial_function(x, a, b): return np.exp(a*np.square(x+b))
# def jax_initial_function(x, a, b): return jnp.exp(a*jnp.square(x+b))

# recovered_params1 = np.array(CurveFit().curve_fit(jax_initial_function, x.flatten(), un[:, 0], 
#                                                   p0=pysr_params)[0])

# recovered_params2 = np.array(curve_fit(initial_function, x.flatten(), un[:, 0], 
#                                        p0=pysr_params, method='lm')[0])

# recovered_params3 = lm_curve_fit(np.round(pysr_params).reshape(-1, 1), 
#                                  x.flatten(), un[:, 0], 
#                                  lambda t,p: np.exp(p[0,0]*np.square(t+p[1,0])))[0].flatten()

# # pysr_params, recovered_params1, recovered_params2, recovered_params3 (recommended when finetuning)
# recovered_params = recovered_params1

# # initial_condition_function = partial(initial_function, a=-1.0, b=2.0) # GROUND
# initial_condition_function = partial(initial_function, a=recovered_params[0], b=recovered_params[1])

# pysr_params, recovered_params1, recovered_params2, recovered_params3

# # func = sympy.lambdify(args=sympy.symbols('x'), expr=equation.sympy_format)
# # equation.equation

In [5]:
equation = pickle_load("./hall/hof.pkl")
feyn_params = np.array(sorted([float(atom) for atom in sympy.sympify(equation).atoms() if atom.is_number]))
feyn_params = feyn_params[[0, 2, 3, 4]]

def initial_function(x, a, b, c, d):
    return c+d*np.exp(a*np.square(b*x-1))

def jax_initial_function(x, a, b, c, d):
    return c+d*jnp.exp(a*jnp.square(b*x-1))

recovered_params = np.array(CurveFit().curve_fit(jax_initial_function, x.flatten(), un[:, 0], 
                                                  p0=feyn_params)[0])

initial_condition_function = partial(initial_function, 
                                     a=recovered_params[0], 
                                     b=recovered_params[1], 
                                     c=recovered_params[2], 
                                     d=recovered_params[3])

recovered_params

pickle_load done
2023-06-05 22:38:11,617 jax._src.xla_bridge 0/1 INFO :: Unable to initialize backend 'cuda': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2023-06-05 22:38:11,617 jax._src.xla_bridge 0/1 INFO :: Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2023-06-05 22:38:11,618 jax._src.xla_bridge 0/1 INFO :: Unable to initialize backend 'tpu': module 'jaxlib.xla_extension' has no attribute 'get_tpu_client'
2023-06-05 22:38:11,619 jax._src.xla_bridge 0/1 INFO :: Unable to initialize backend 'plugin': xla_extension has no attributes named get_plugin_device_client. Compile TensorFlow with //tensorflow/compiler/xla/python:enable_plugin_device set to true (defaults to false) to enable this.


array([-4.15259791e+00, -4.97142427e-01,  2.95012304e-03,  1.00797403e+00])

In [6]:
LHS = "dt(u)"; RHS = "="
for i, term in enumerate(effective_terms):
    if '*' not in term:
        # linear
        add_string = (lambda s: '+' if s < 0 else '-')(init_params[i])
        add_string = add_string + chr(97+i) + '*' + effective_terms[i]
        eval(compile(f"{chr(97+i)} = {abs(init_params[i])}", "<string>", "exec"))
        LHS = LHS + add_string
    else:
        # non-linear
        add_string = (lambda s: '-' if s < 0 else '+')(init_params[i])
        add_string = add_string + chr(97+i) + '*' + effective_terms[i]
        eval(compile(f"{chr(97+i)} = {abs(init_params[i])}", "<string>", "exec"))
        RHS = RHS + add_string 
problem_string = LHS+RHS
problem_string

'dt(u)-a*dx(dx(u))=-b*u*dx(u)'

In [7]:
# Parameters
Lx = 8
spatial_domain = (-Lx, Lx)
Nx = 256
dealias = 1.5 # 3/2
stop_sim_time = 10
timestepper = d3.SBDF4
acc = 4
timestep = (4e-3)/acc
spatial_variable_name = 'x'
target_variable_name = 'u'
dtype = np.float64

In [8]:
# Bases
xcoord = d3.Coordinate(spatial_variable_name)
dist = d3.Distributor(xcoord, dtype=dtype)
xbasis = d3.RealFourier(xcoord, size=Nx, bounds=spatial_domain, dealias=dealias)
# Fields
u = dist.Field(name=target_variable_name, bases=xbasis)
# Substitutions
dx = lambda A: d3.Differentiate(A, xcoord)

In [9]:
# Problem
problem = d3.IVP([u], namespace=locals())
# problem.add_equation("dt(u) - a*dx(dx(u)) - b*dx(dx(dx(u))) = - u*dx(u)")
problem.add_equation(problem_string)

# Initial conditions
x = dist.local_grid(xbasis)
u['g'] = initial_condition_function(x)
# u['g'] = un[:, 0]

#### Information criterion

In [10]:
u_clean = loadmat("/Users/pongpisit/Desktop/research/PDE-FIND/Datasets/burgers.mat")["usol"].real
cf_sol2 = loadmat("../DeepHPMs/Matlab/Burgers_sim/burgers_sim_com2.mat")["usol"]
cf_sol3 = loadmat("../DeepHPMs/Matlab/Burgers_sim/burgers_sim_com3.mat")["usol"]

In [11]:
undenoised = False
if undenoised:
    np.random.seed(0)
    u_test = u_clean + 0.01*np.abs(noise_lv)*(u_clean.std())*np.random.randn(u_clean.shape[0], 
                                                                             u_clean.shape[1])
else:
    u_test = un.copy()
    
bic_com2, _ = BIC_AIC(cf_sol2.reshape(-1, 1), u_test.reshape(-1, 1), complexity)
bic_com3, _ = BIC_AIC(cf_sol3.reshape(-1, 1), u_test.reshape(-1, 1), complexity)
assert bic_com2 < bic_com3
bic_com2, bic_com3

(-134490.26864181866, -134160.41842963122)

In [12]:
undenoised = True
if undenoised:
    np.random.seed(0)
    u_test = u_clean + 0.01*np.abs(noise_lv)*(u_clean.std())*np.random.randn(u_clean.shape[0], 
                                                                             u_clean.shape[1])
else:
    u_test = un.copy()
    
bic_com2, _ = BIC_AIC(cf_sol2.reshape(-1, 1), u_test.reshape(-1, 1), complexity)
bic_com3, _ = BIC_AIC(cf_sol3.reshape(-1, 1), u_test.reshape(-1, 1), complexity)
assert bic_com2 < bic_com3
bic_com3, bic_com2

(-77506.82112795026, -77545.87932368998)

In [13]:
## Chenfun | feyn -> params1 (Best adjusted IC) ***
# (2, -134490.26864181866, _) on denoised
# (3, -134160.41842963122, _) on denoised
# (2, -77506.82112795026, _) on undenoised
# (3, -77545.87932368998, _) on undenoised