In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import scienceplots

import math
import os
import random
from functools import partial
from decimal import Decimal
import numpy as np
# from sklearnex import patch_sklearn; patch_sklearn() # if you are using intel cpus
import scipy.io as sio
from scipy.integrate import solve_ivp
from scipy.signal import savgol_filter
import pysindy as ps
from pysindy.utils import lorenz
from tqdm import trange

# NSGA2, DNSGA2, SMSEMOA, AGEMOEA2
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.algorithms.moo.dnsga2 import DNSGA2
from pymoo.algorithms.moo.sms import SMSEMOA
from pymoo.algorithms.moo.age2 import AGEMOEA2
from pymoo.core.problem import ElementwiseProblem
from pymoo.core.sampling import Sampling
from pymoo.core.crossover import Crossover
from pymoo.core.mutation import Mutation
from pymoo.core.duplicate import ElementwiseDuplicateElimination
from pymoo.termination.default import DefaultMultiObjectiveTermination
from pymoo.optimize import minimize
from pymoo.visualization.scatter import Scatter

from utils import *
from skimage.restoration import estimate_sigma
import bm3d
from okridge.solvel0 import *
from best_subset import backward_refinement, brute_force_all_subsets
from solvel0 import solvel0
from UBIC import *
from kneed import KneeLocator

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel

  from .autonotebook import tqdm as notebook_tqdm


Sklearn's version: 1.6.1


In [2]:
# Integrator keywords for solve_ivp
integrator_keywords = {}
integrator_keywords['rtol'] = 1e-12
integrator_keywords['method'] = 'LSODA'
integrator_keywords['atol'] = 1e-12

dt = 0.001
t_train = np.arange(0, 100, dt)
x0_train = [-8, 8, 27]
x_train = solve_ivp(lorenz, (t_train[0], t_train[-1]), 
                    x0_train, t_eval=t_train, **integrator_keywords).y.T
x_dot_train_measured = np.array(
    [lorenz(0, x_train[i]) for i in range(t_train.size)]
)
true_complexities = [2, 3, 2]

In [3]:
noise_level = 1e-2
noise = np.zeros(x_train.shape)
x_train_clean = x_train.copy()
if noise_level > 0:
    noise = np.random.normal(scale=noise_level, size=x_train.shape)
    x_train = x_train_clean + noise

In [4]:
x_train = savgol_filter(x_train, 11, 3, axis=0)

In [5]:
# TODO: Implement TVDiff
n_poly = 4
n_vars = len(true_complexities)
n_modules = 8
differentiation_method = ps.differentiation.FiniteDifference()
differentiation_method = ps.differentiation.SmoothedFiniteDifference()

In [6]:
ode_lib = ps.WeakPDELibrary(function_library=ps.PolynomialLibrary(degree=n_poly, include_bias=False), 
                            spatiotemporal_grid=t_train,
                            include_bias=True,
                            K=5000,
                            differentiation_method=differentiation_method,
                            diff_kwargs={"is_uniform":True})

In [7]:
normalize = False
X_pre = np.array(ode_lib.fit_transform(x_train))
y_pre = np.array(ode_lib.convert_u_dot_integral(x_train))
max_features = np.ones((1, X_pre.shape[-1]))
if normalize:
    max_features = X_pre.max(axis=0)
    X_pre = X_pre / max_features

base_features = dict(zip(distribute_order(n_poly, n_vars), X_pre.T))

# solvel0(X_pre, y_pre[:, 0:1], miosr=True, refine=True, max_complexity=8)

# from joblib import Parallel, delayed
# res = Parallel(n_jobs=3)(delayed(okridge_solvel0_full)(
#     X_pre, select_column(y_pre, i), k=8) for i in range(3)
#                         )

# GA
# generate (a, b, c) where a, b, c are integers greater than or equal to 0 and a+b+c <= poly_order

In [8]:
class OdeDiscoveryProblem(ElementwiseProblem):
    def __init__(self, n_poly, n_vars, n_modules, 
                 base_features, target, epsilon=1e-3):
        super().__init__(n_var=1, n_obj=2, n_ieq_constr=0)
        self.n_poly = n_poly
        self.n_vars = n_vars
        self.n_modules = n_modules
        self.base_features = base_features
        self.target = target
        self.epsilon = epsilon
        self.sample_size = np.prod(self.target.shape)

    def _evaluate(self, X, out, *args, **kwargs):
        print(X)
        
    def generate_ode_module(self):
        module = [0 for _ in range(self.n_vars)]
        for i in range(self.n_vars):
            deg = random.randint(0, self.n_poly)
            module[i] = deg
            self.n_poly -= deg
            if n_poly <= 0:
                break
        return tuple(module)
        
    def numericalize_genome(self, genome):
        return np.stack([self.base_features[tuple(module)] 
                         for module in genome], axis=-1)

    def compute_genome_coefficient(self, genome):
        features = self.numericalize_genome(genome)
        features = features.reshape(-1, features.shape[-1])
        coeff, error, _, _ = np.linalg.lstsq(features, self.target, rcond=None)
        return coeff, np.array(error)

    def set_epsilon(self, epsilon):
        self.epsilon = epsilon

problem = OdeDiscoveryProblem(n_poly, n_vars, n_modules, base_features, y_pre[:, 0:1])
problem.compute_genome_coefficient(frozenset(problem.generate_ode_module() for _ in range(3)))

(array([[-2.59802118e+00],
        [ 2.46487008e-04]]),
 array([132201.19591727]))

In [9]:
# {((xx, xx, xx), (xx, xx, xx), (xx, xx, xx)), ((xx, xx, xx), (xx, xx, xx), (xx, xx, xx))}