In [1]:
### Author: Pongpisit Thanasutives ###
import os
from itertools import combinations
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from tqdm import trange
from scipy import io as sio
import pysindy as ps

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def TopRsq(X_full, y, m, n_tops=25):
    n_feats = X_full.shape[-1]
    r_scores = []
    models = []
    for comb in combinations(range(n_feats), m):
        comb = list(comb)
        active_indices = np.zeros(n_feats)
        active_indices[comb] = 1
        X_sub = X_full[:, comb]
        lr = LinearRegression(fit_intercept=False).fit(X_sub, y)
        R2 = lr.score(X_sub, y)
        r_scores.append(R2)
        models.append(active_indices)
    r_scores = np.array(r_scores)
    r_argsort = np.argsort(r_scores)[::-1][:n_tops]
    r_scores = r_scores[r_argsort]
    models = np.array(models).T
    models = models[:, r_argsort]
    rating = np.dot(models, r_scores)
    return models, r_scores, rating

In [3]:
def comprehensive_search(X_full, y, max_support_size=8, n_tops=None, threshold=0.75, lookback=False):
    X = X_full.copy()
    n_feats = X_full.shape[-1]
    n_tops = int(np.ceil(n_feats/2)) if n_tops is None else n_tops
    ratings = np.zeros((n_feats, max_support_size))
    search = True; support_size = 1
    optimal_indices = None
    active_indices = [_ for _ in range(n_feats)]
    while search and support_size <= max_support_size:
        _, _, rating = TopRsq(X, y, m=support_size, n_tops=n_tops)
        rating = rating/rating.max()
        ratings[:, support_size-1][active_indices] = rating
        if support_size >= 2:
            i0 = np.where(ratings[:, support_size-1] + ratings[:, support_size-2] == 0.)[0]
            active_indices = [_ for _ in active_indices if _ not in set(i0)]
            X = X_full[:, active_indices]
            i1 = np.where(ratings[:, support_size-1] > 0)[0]
            i2 = np.where(ratings[:, support_size-2] > 0)[0]
            if len(i1) == len(i2) and np.all(i1 == i2):
                search = False
                optimal_indices = set(np.where(ratings[:, support_size-1] > threshold)[0])
                if lookback:
                    optimal_indices = optimal_indices.intersection(set(np.where(ratings[:, support_size-2] > threshold)[0]))
                optimal_indices = sorted(optimal_indices)
                if len(optimal_indices) == 0:
                    optimal_indices = None
                    print("No term whose improtance is greater than the threshold...")
        support_size += 1
    if optimal_indices is None:
        print("Not converged...")
    return optimal_indices, ratings[:, :support_size-1]

In [4]:
n_experiments = 100
n_samples = 10000
n_features = 8
n_informative = 2

threshold = 0.75
max_support_size = 8

success = 0
for i in trange(n_experiments):
    X_train, y_train = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative)
    top_models, _, _ = TopRsq(X_train, y_train, m=n_informative)
    true_indices = np.where(top_models[:, 0] > 0)[0]
    est_indices, ratings = comprehensive_search(X_train, y_train, max_support_size=max_support_size, threshold=threshold)
    if est_indices is not None and len(true_indices) == len(est_indices) and np.all(true_indices == est_indices):
        success += 1
        
success/n_experiments

100%|██████████████████████████████████████████████████████████| 100/100 [00:11<00:00,  8.35it/s]


0.96

In [5]:
data_path = "./Datasets/"
data = sio.loadmat(os.path.join(data_path, "burgers.mat"))
u_clean = (data['usol']).real; u = u_clean.copy()
x = (data['x'][0]).real
t = (data['t'][:,0]).real
dt = t[1]-t[0]; dx = x[2]-x[1]

np.random.seed(0)
noise_type = "gaussian"
noise_lv = float(50)
print("Noise level:", noise_lv)
noise = 0.01*np.abs(noise_lv)*(u.std())*np.random.randn(u.shape[0],u.shape[1])
u = u + noise
u = np.load("./Denoised_data/burgers_gaussian50_bm3d.npy")

xt = np.array([x.reshape(-1, 1), t.reshape(1, -1)], dtype=object)
X, T = np.meshgrid(x, t)
XT = np.asarray([X, T]).T

Noise level: 50.0


In [6]:
function_library = ps.PolynomialLibrary(degree=2, include_bias=False)

weak_lib = ps.WeakPDELibrary(
    function_library=function_library,
    derivative_order=3,
    spatiotemporal_grid=XT,
    include_bias=True,
    diff_kwargs={"is_uniform":True},
    K=10000
)

X_pre = np.array(weak_lib.fit_transform(np.expand_dims(u, -1)))
y_pre = weak_lib.convert_u_dot_integral(np.expand_dims(u, -1))

In [7]:
effective_indices, rating = comprehensive_search(X_pre, y_pre, 
                                                 max_support_size=max_support_size, 
                                                 threshold=0.75, lookback=True)
effective_indices, rating

([4, 6],
 array([[0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.47403751, 0.16667916, 0.33329491, 0.33333348],
        [0.99522028, 0.23659438, 0.        , 0.16664701, 0.49994486],
        [0.        , 0.50832923, 1.        , 1.        , 1.        ],
        [0.4026134 , 0.        , 0.16669967, 0.33335042, 0.83335741],
        [1.        , 1.        , 1.        , 1.        , 1.        ],
        [0.85917427, 0.24401524, 0.16666811, 0.1666479 , 0.50005514],
        [0.        , 0.25272034, 0.16663835, 0.33329349, 0.16666985],
        [0.        , 0.24552254, 0.16668093, 0.3334116 , 0.49999667],
        [0.5313917 , 0.        , 0.        , 0.        , 0.        ],
        [0.34899425, 0.        , 0.16663378, 0.33335467, 0.16664259]]))