In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import scienceplots

import math
import os
import random
from functools import partial
from decimal import Decimal
import numpy as np
import scipy.io as sio
import pysindy as ps
from tqdm import trange

from pymoo_ga import *
# NSGA2, DNSGA2, SMSEMOA, AGEMOEA2
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.algorithms.moo.dnsga2 import DNSGA2
from pymoo.algorithms.moo.sms import SMSEMOA
from pymoo.algorithms.moo.age2 import AGEMOEA2
from pymoo.termination.default import DefaultMultiObjectiveTermination
from pymoo.optimize import minimize

from utils import *
from skimage.restoration import estimate_sigma
import bm3d
# from okridge.solvel0 import *
from solvel0 import solvel0, MIOSR
from best_subset import backward_refinement, brute_force, brute_force_all_subsets
from UBIC import *
from kneed import KneeLocator
from bayesian_model_evidence import log_evidence

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from sklearn.preprocessing import StandardScaler
from sklearn import covariance
from sklearn.metrics import mean_absolute_percentage_error

mrmr is not installed in the env you are using. This may cause an error in future if you try to use the (missing) lib.
L0BnB is not installed.


In [2]:
n_poly = 6
n_derivatives = 6
n_modules = 8

In [3]:
data_path = "../PDE-Discovery-EC/Datasets/"
print(os.listdir(data_path))
data = sio.loadmat(os.path.join(data_path, "KdV_rudy.mat"))
u_clean = (data['usol']).real; u = u_clean.copy()
x = data['x'].ravel()
t = data['t'].ravel()
dt = t[1]-t[0]; dx = x[2]-x[1]

['KdV_sine_rep_big.mat', 'kuramoto_sivishinky.mat', 'lorenz100.npy', 'Wave_equation', 'KdV_rudy.mat', 'lorenz10.npy', 'KG_Exp.mat', 'burgers.mat']


### Add noise

In [4]:
np.random.seed(0)
noise_type = "gaussian"
noise_lv = float(50)
print("Noise level:", noise_lv)
noise = 0.01*np.abs(noise_lv)*(u.std())*np.random.randn(u.shape[0],u.shape[1])
u = u + noise

Noise level: 50.0


### Denoise

In [5]:
# load_denoised_data = True
# if load_denoised_data:
#     print("Loading denoised data...")
#     u = np.load(f"./Denoised_data/kdv_{noise_type}{int(noise_lv)}_bm3d.npy")
# else:
#     print("denoising...")
#     n_sampled_t = 30
    
#     kernel = RBF(length_scale=1, length_scale_bounds=(1e-2, 1e3)) + \
#             WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e10))
    
#     xx = colvec(x)
#     u_std = np.ones((u.shape[0], len(t)))
#     for i in trange(len(t)):    
#         gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0, 
#                                        n_restarts_optimizer=10 # 20
#                                       )
    
#         gpr.fit(xx, u[:, i])
#         _, ustd = gpr.predict(xx, return_std=True)
#         u_std[:, i] = ustd

#     est_sigma = u_std.mean() # max also works well
#     # est_sigma = (est_sigma+estimate_sigma(u))/2
#     u = bm3d.bm3d(u, sigma_psd=est_sigma, 
#                   stage_arg=bm3d.BM3DStages.ALL_STAGES, 
#                   blockmatches=(False, False))

#     np.save(f"./Denoised_data/kdv_{noise_type}{int(noise_lv)}_bm3d.npy", u)

np.random.seed(0)
fake_noise = np.random.normal(loc=0.0, scale=estimate_sigma(u), size=u.shape)
sigmas = estimate_sigma(u+fake_noise)*np.arange(0.1, 2., 0.1)
est_sigma = sigmas[np.argmin([((u-bm3d.bm3d(u+fake_noise, sigma_psd=sigma, stage_arg=bm3d.BM3DStages.ALL_STAGES, blockmatches=(False, False)))**2).mean() \
                              for sigma in sigmas])]
u = bm3d.bm3d(u, sigma_psd=est_sigma, 
                  stage_arg=bm3d.BM3DStages.ALL_STAGES, 
                  blockmatches=(False, False))

In [6]:
xt = np.array([x.reshape(-1, 1), t.reshape(1, -1)], dtype=object)
X, T = np.meshgrid(x, t)
XT = np.asarray([X, T]).T

In [7]:
function_library = ps.PolynomialLibrary(degree=n_poly, include_bias=False)

weak_lib = ps.WeakPDELibrary(
    function_library=function_library,
    derivative_order=n_derivatives,
    p=n_derivatives,
    spatiotemporal_grid=XT,
    include_bias=True,
    K=10000, # 2000, 5000, 10000
    diff_kwargs={"is_uniform":True}
)

X_pre = np.array(weak_lib.fit_transform(np.expand_dims(u, -1)))
y_pre = weak_lib.convert_u_dot_integral(np.expand_dims(u, -1))
feature_names = np.array(weak_lib.get_feature_names())

# R_path = "./Cache/"
# np.save(os.path.join(R_path, f"X_pre_kdv_noise{int(noise_lv)}.npy"), X_pre)
# np.save(os.path.join(R_path, f"y_pre_kdv_noise{int(noise_lv)}.npy"), y_pre)
# np.save(os.path.join(R_path, f"feature_names_kdv.npy"), feature_names)

In [8]:
base_poly = np.array([[p, 0] for p in range(1, n_poly+1)])
base_derivative = np.array([[0, d] for d in range(1, n_derivatives+1)])
modules = [(0, 0)] if weak_lib.include_bias else []
modules += [(p, 0) for p in range(1, n_poly+1)] + \
            [(0, d) for d in range(1, n_derivatives+1)] + \
            [tuple(p+d) for d in base_derivative for p in base_poly]
assert len(modules) == len(weak_lib.get_feature_names())
base_features = dict(zip(modules, X_pre.T))
u_t = y_pre.copy()

### Genetic algorithm with NSGA-II

In [9]:
pop_size = 500
problem = PdeDiscoveryProblem(n_poly, n_derivatives, n_modules, 
                              base_features, u_t, order_complexity=False)

In [10]:
load_pareto_front = False

if not load_pareto_front:
    termination = DefaultMultiObjectiveTermination(
        xtol=1e-8,
        cvtol=1e-6,
        ftol=1e-8,
        period=50,
        n_max_gen=100,
        n_max_evals=100000
    )

    from pymoo.algorithms.moo.sms import SMSEMOA

    # algorithm = NSGA2(
    #                 pop_size=pop_size, 
    #                 sampling=PopulationSampling(), 
    #                 crossover=GenomeCrossover(), 
    #                 mutation=GenomeMutation(), 
    #                 eliminate_duplicates=DuplicateElimination()
    #                 )

    # algorithm = DNSGA2(
    #                 pop_size=pop_size,
    #                 sampling=PopulationSampling(),
    #                 crossover=GenomeCrossover(),
    #                 mutation=GenomeMutation(),
    #                 eliminate_duplicates=DuplicateElimination()
    #                 )

    algorithm = SMSEMOA(
                    pop_size=pop_size,
                    sampling=PopulationSampling(),
                    crossover=GenomeCrossover(),
                    mutation=GenomeMutation(),
                    eliminate_duplicates=DuplicateElimination()
                    )

    res = minimize(problem, 
                   algorithm, 
                   termination=termination, 
                   verbose=True
                  )
    
    pareto_optimal_models = res.X
    # np.save(f"./Cache/pf_kdv_noise{int(noise_lv)}.npy", pareto_optimal_models)

else:
    pareto_optimal_models = np.load(f"./Cache/pf_kdv_noise{int(noise_lv)}.npy", allow_pickle=True)

n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      500 |      5 |             - |             -
     2 |     1000 |      7 |  0.0736118699 |             f
     3 |     1500 |      6 |  0.0336060703 |         ideal
     4 |     2000 |      5 |  0.0155745532 |             f
     5 |     2500 |      5 |  0.0114725978 |             f
     6 |     3000 |      7 |  0.2857142857 |         nadir
     7 |     3500 |      7 |  0.0273196521 |             f
     8 |     4000 |      8 |  0.0179383481 |             f
     9 |     4500 |      9 |  0.0146675787 |         ideal
    10 |     5000 |      7 |  0.0033864493 |             f
    11 |     5500 |      7 |  0.0047262466 |         ideal
    12 |     6000 |      8 |  0.0089403162 |         ideal
    13 |     6500 |      8 |  0.000000E+00 |             f
    14 |     7000 |      8 |  0.0007896914 |             f
    15 |     7500 |      8 |  0.0007896914 |             f
    16 |     8000 |      9 |  0.1250000000 |         nad

In [11]:
### OPTIONAL ###
from operator import itemgetter

effective_candidates = extract_unique_candidates(pareto_optimal_models)

new_pareto_optimal_models = []
for bs in backward_refinement([sorted([effective_candidates.index(_) for _ in list(pm[0])]) for pm in pareto_optimal_models], 
                              (problem.numericalize_genome(effective_candidates), y_pre)).get_best_subsets():
    bs = itemgetter(*bs)(effective_candidates)
    if type(bs[0]) is not tuple:
        bs = (bs,)
    new_pareto_optimal_models.append([frozenset(bs)])
pareto_optimal_models = np.array(new_pareto_optimal_models)
del new_pareto_optimal_models
pareto_optimal_models

array([[frozenset({(0, 1)})],
       [frozenset({(1, 1), (0, 3)})],
       [frozenset({(0, 1), (1, 1), (0, 3)})],
       [frozenset({(1, 1), (0, 3), (1, 3), (0, 5)})],
       [frozenset({(0, 1), (2, 1), (6, 1), (0, 3), (0, 5)})],
       [frozenset({(2, 1), (3, 1), (1, 1), (0, 3), (5, 1), (0, 5)})],
       [frozenset({(2, 1), (3, 1), (6, 1), (0, 3), (5, 1), (0, 5), (4, 1)})],
       [frozenset({(2, 1), (4, 1), (3, 1), (6, 1), (0, 3), (5, 1), (0, 5), (1, 3)})],
       [frozenset({(4, 1), (3, 1), (6, 1), (1, 1), (0, 3), (5, 1), (2, 3), (0, 5), (1, 3)})],
       [frozenset({(4, 1), (3, 1), (6, 1), (1, 1), (0, 3), (5, 1), (2, 3), (3, 3), (0, 5), (1, 3)})],
       [frozenset({(0, 1), (2, 1), (4, 1), (3, 1), (6, 1), (0, 3), (5, 1), (2, 3), (3, 3), (0, 5), (1, 3)})],
       [frozenset({(0, 1), (0, 4), (2, 1), (4, 1), (3, 1), (6, 1), (0, 3), (5, 1), (2, 3), (3, 3), (0, 5), (1, 3)})],
       [frozenset({(0, 1), (0, 4), (2, 1), (4, 1), (3, 1), (6, 1), (0, 3), (5, 1), (2, 3), (3, 3), (5, 6), (0, 5

### Compromise programming ###

In [12]:
from collections import Counter
from pymcdm import weights as obj_w
from compromise_programming import mcdm, ranks2decision

F = problem.evaluate(pareto_optimal_models)
obj_weights = obj_w.entropy_weights(F)
types = [-1, -1]

ranks, prefs = mcdm(F, obj_weights, types)
balance_point, most_common = ranks2decision(ranks)
F[balance_point], most_common, ranks, prefs

(array([4.91956093e-05, 2.00000000e+00]),
 [(1, 3), (2, 1)],
 [array([16.,  2.,  1.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
         13., 14., 15., 17.]),
  array([16.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
         13., 14., 15., 17.]),
  array([11.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 12., 13.,
         14., 15., 16., 17.]),
  array([16.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
         13., 14., 15., 17.])],
 [array([0.50749412, 0.87751875, 0.88048121, 0.86344656, 0.82472955,
         0.7891233 , 0.75280038, 0.71794293, 0.68469211, 0.6535963 ,
         0.62433343, 0.59705579, 0.57178781, 0.54876403, 0.52789351,
         0.50915144, 0.49250588]),
  array([-0.17074658,  0.20824769,  0.20244754,  0.20107001,  0.17334595,
          0.15241823,  0.12446387,  0.0951175 ,  0.0646689 ,  0.0345256 ,
          0.0036586 , -0.0275946 , -0.05921842, -0.09073641, -0.12234304,
         -0.15402607, -0.18573482]),
  array([0.625   

### Intercept or NO Intercept? ###

In [13]:
# true_indices = [9, 13]
# true_coefficients = [-1, -6]
# true_ols = sm.OLS(y_pre, X_pre[:, true_indices]).fit()
# estimated_coefficients = true_ols.params
# print(estimated_coefficients, mean_absolute_percentage_error(true_coefficients, estimated_coefficients))
# true_ols.bic, sm.OLS(y_pre, X_pre[:, [0] + true_indices]).fit().bic