In [3]:
# Imports
import numpy as np
import torch

from phimal_utilities.data import Dataset
from phimal_utilities.data.burgers import BurgersDelta
from phimal_utilities.analysis import load_tensorboard

from DeePyMoD_SBL.deepymod_torch.library_functions import library_1D_in
from DeePyMoD_SBL.deepymod_torch.DeepMod import DeepModDynamic
from DeePyMoD_SBL.deepymod_torch.training import train_dynamic
from DeePyMoD_SBL.deepymod_torch.estimators import Threshold, Clustering, PDEFIND
from pysindy.optimizers import STLSQ

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'svg'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
v = 0.1
A = 1.0

# Making grid
x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')

dataset = Dataset(BurgersDelta, v=v, A=A)
#X_train, y_train, rand_idx = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=1000, noise=0.2, random=True, return_idx=True)

In [5]:
theta = dataset.library(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1))
dt = dataset.time_deriv(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1))

In [6]:
theta_normed = theta / np.linalg.norm(theta, axis=0, keepdims=True)

# Building train STLSQ method

In [7]:
from pysindy.optimizers import STLSQ
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [37]:
def TrainSTRidge(R, Ut, lam=1e-5, d_tol=1.0, maxit = 50, STR_iters = 10, l0_penalty = None, normalize = 2, split = 0.8, print_best_tol = False):
        """
        This function trains a predictor using STRidge.

        It runs over different values of tolerance and trains predictors on a training set, then evaluates them 
        using a loss function on a holdout set.

        Please note published article has typo.  Loss function used here for model selection evaluates fidelity using 2-norm,
        not squared 2-norm.
        """

        # Split data into 80% training and 20% test, then search for the best tolderance.
        np.random.seed(0) # for consistancy
        n,_ = R.shape
        train = np.random.choice(n, int(n*split), replace = False)
        test = [i for i in np.arange(n) if i not in train]
        TrainR = R[train,:]
        TestR = R[test,:]
        TrainY = Ut[train,:]
        TestY = Ut[test,:]
        D = TrainR.shape[1]       

        # Set up the initial tolerance and l0 penalty
        d_tol = float(d_tol)
        tol = d_tol
        if l0_penalty == None: l0_penalty = 0.001*np.linalg.cond(R)

        # Get the standard least squares estimator
        w = np.zeros((D,1))
        w_best = np.linalg.lstsq(TrainR, TrainY)[0]
        err_best = np.linalg.norm(TestY - TestR.dot(w_best), 2) + l0_penalty*np.count_nonzero(w_best)
        tol_best = 0

        # Now increase tolerance until test performance decreases
        for iter in range(maxit):

            # Get a set of coefficients and error
            opt = STLSQ(threshold=tol, alpha=lam, fit_intercept=False)
            w = opt.fit(TrainR, TrainY).coef_.T
            err = np.linalg.norm(TestY - TestR.dot(w), 2) + l0_penalty*np.count_nonzero(w)
        
            # Has the accuracy improved?
            if err <= err_best:
                err_best = err
                w_best = w
                tol_best = tol
                tol = tol + d_tol

            else:
                tol = max([0,tol - 2*d_tol])
                d_tol  = 2*d_tol / (maxit - iter)
                tol = tol + d_tol
        return w_best, tol_best

In [42]:
def TrainSTLSQ(X, y, lam=1e-5, d_tol=1.0, maxit = 50, STR_iters = 10, l0_penalty = None, normalize = 2, split = 0.8, print_best_tol = False):
        """
        This function trains a predictor using STRidge.

        It runs over different values of tolerance and trains predictors on a training set, then evaluates them 
        using a loss function on a holdout set.

        Please note published article has typo.  Loss function used here for model selection evaluates fidelity using 2-norm,
        not squared 2-norm.
        """

        # Split data into 80% training and 20% test, then search for the best tolderance.
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=split, random_state=0)

        # Set up the initial tolerance and l0 penalty
        tol = d_tol
        if l0_penalty == None: l0_penalty = 0.001*np.linalg.cond(X)

        # Get the standard least squares estimator
        w_best = np.linalg.lstsq(X_train, y_train)[0]
        err_best = np.linalg.norm(y_test - X_test.dot(w_best), 2) + l0_penalty*np.count_nonzero(w_best)
        
        opt = STLSQ(threshold=tol, alpha=lam, fit_intercept=False)
        # Now increase tolerance until test performance decreases
        for iter in range(maxit):
            # Get a set of coefficients and error
            opt.set_params(threshold=tol)
            w = opt.fit(X_train, y_train).coef_.T
            err = np.linalg.norm(y_test - X_test.dot(w), 2) + l0_penalty*np.count_nonzero(w)
        
            # Has the accuracy improved?
            if err <= err_best:
                err_best = err
                w_best = w
                tol_best = tol
                tol = tol + d_tol

            else:
                tol = max([0,tol - 2*d_tol])
                d_tol  = 2*d_tol / (maxit - iter)
                tol = tol + d_tol
        return w, w_best

In [60]:
noise = 1.0 * np.std(dt) * np.random.normal(size=dt.shape)

In [61]:
TrainSTLSQ(theta_normed, dt + noise).coef_

array([[  0.        ,   0.        ,   7.3884286 ,   0.        ,
        -10.45544219,   0.        ,   0.        ,   0.        ,
          0.        ]])

In [62]:
TrainSTRidge(theta_normed, dt + noise)



(array([[  0.        ],
        [  0.        ],
        [  7.41875247],
        [  0.        ],
        [-10.56869588],
        [  0.        ],
        [  0.        ],
        [  0.        ],
        [  0.        ]]), 7.418604651162793)

In [70]:
TrainSTRidge_bad(theta_normed, dt + noise, lam=1e-5, d_tol=1)



array([[  0.        ],
       [  0.        ],
       [  7.03695291],
       [  0.        ],
       [-10.63650339],
       [  0.        ],
       [  0.        ],
       [  0.        ],
       [  0.        ]])

In [28]:
np.linalg.lstsq(theta_normed, dt)[0]

  """Entry point for launching an IPython kernel.


array([[ 5.19830877e-15],
       [ 2.39825521e-16],
       [ 7.33313063e+00],
       [ 4.61002764e-16],
       [-1.05619424e+01],
       [ 4.98104162e-15],
       [-3.99723657e-15],
       [ 1.61331113e-15],
       [-4.15021750e-15]])

In [29]:
optimizer = STLSQ(threshold=0.0, alpha=0.0, fit_intercept=False) # Now similar to LSTSQ
optimizer.fit(theta_normed, dt).coef_

array([[ 1.69785190e-14, -2.33973202e-14,  7.33313063e+00,
        -7.74398621e-14, -1.05619424e+01,  4.12910755e-13,
         1.17878114e-13, -6.93212616e-14, -2.67738544e-13]])

# Comparing splitting

In [14]:
R = theta_normed
Ut = dt

split=0.8

In [15]:
np.random.seed(0) # for consistancy
n,_ = R.shape
train = np.random.choice(n, int(n*split), replace = False)
test = [i for i in np.arange(n) if i not in train]
TrainR = R[train,:]
TestR = R[test,:]
TrainY = Ut[train,:]
TestY = Ut[test,:]
D = TrainR.shape[1]     

In [37]:
X_train, X_test, y_train, y_test = train_test_split(R, Ut, train_size=0.8, random_state=0)

In [38]:
X_train.shape

(4000, 9)

In [39]:
y_test.shape

(1000, 1)

In [40]:
np.all(TrainY[:1000]  == y_test)

True

In [41]:
y_test[:5]

array([[0.00182186],
       [0.12783041],
       [0.00254644],
       [0.00061087],
       [0.00294557]])

In [45]:
y_train.shape

(4000, 1)

In [7]:
np.random.seed()

In [56]:
def TrainSTLSQ(X, y, alpha=1e-5, delta_threshold=1.0, max_iterations=100, test_size=0.2, random_state=0):
        '''Train STLSQ. Assumes data already normalized'''
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
        
        # Set up the initial tolerance l0 penalty and estimates
        l0 = 1e-3 * np.linalg.cond(X)
        delta_t = delta_threshold # for interal use, can be updated
      
        # Initial estimate
        optimizer = STLSQ(threshold=0, alpha=0.0, fit_intercept=False) # Now similar to LSTSQ
        y_predict = optimizer.fit(X_train, y_train).predict(X_test)
        min_loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_)
        
        # Setting alpha and tolerance
        best_threshold = delta_t
        threshold = delta_t

        for iteration in np.arange(max_iterations):
            optimizer.set_params(alpha=alpha, threshold=threshold)
            y_predict = optimizer.fit(X_train, y_train).predict(X_test)
            loss = np.linalg.norm(y_predict - y_test, 2) + l0 * np.count_nonzero(optimizer.coef_)
    
            if (loss <= min_loss) and not (np.all(optimizer.coef_ == 0)):
                min_loss = loss
                best_threshold = threshold
                threshold += delta_threshold
               
            else: # if loss increases, we need to a) lower the current threshold and/or decrease step size
                new_lower_threshold = np.max([0, threshold - 2 * delta_t])
                delta_t = 2 * delta_t / (max_iterations - iteration)
                threshold = new_lower_threshold + delta_t
        
        optimizer.set_params(alpha=alpha, threshold=best_threshold)
        optimizer.fit(X_train, y_train)
        
        return optimizer