In [1]:
from scipy import integrate
import numpy as np

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
def indicator(x, endpts):
    '''
Definition:
     indicator function is function which if x value is inside the bound, you will get 1
     Otherwise you will get 0
     
Require:
    x, left_bound, right_bound must have the same dimension

Parameters: 
    
        x: 1 x n vector representing the index of point to check (Time dimension should be excluded)

        endpts: 2d (n x 2) array of index. First dimension is all the spatial dimensions, and second dimension are 
                left and right bound of the subdomain in terms of index
    
`return: 
        1 or 0, should be clear enough
    
    '''
    if len(x) != len(len(endpts[:, 0])):
        raise ValueError("Parameter dimensions do not agree.")
        
    for i in np.arange(len(endpts[:, 0])):
        if x[i] < endpts[i, 0] or x > endpts[i, 1]:
            return 0
    return 1

In [3]:
def compute_integral(X, spatiotemporal_grid, t, j, endpts):
    '''
    Parameters: 
    
        X: data grid
        
        spatiotemporal_grid: The spatiotemporal_grid that contains information about spatial and time data points.
        
        j: feature index
        
        endpts: n x 2 array 
            the first column is the left endpoints of the subdomain's each of the n dimensions in terms of index,
            second column is right endpoint of each of the subdomain's each of the n dimensions in terms of index
            
    return:
        nd integral within a subdomain
    '''  
    
#     Since all the spatiotemporal_grid only contains time and spatial dimensions, and there must be 1 time dimension
#     the number of spatial is then given as following
    grid_ndim = len(np.shape(spatiotemporal_grid))-1
    
# find weights
#     All the 1D weights will be stored in a 2D matrix as cols
#     sudo_var1: max number of pts per dim.
    weights = []
    for i in np.arange(grid_ndim):
#         +1 to account for the time dimension
        index = [0]*(grid_ndim+1)
        index[i] = slice(None)
        index[-1] = i
#         Time is always the second to last dimension, which is filtered here
        index[-2] = t
        
#         we now get the 1D grid by filtering by the index created
        this_dim = spatiotemporal_grid[index]
        
        weight = get_1D_weight(this_dim, endpts[i, :])
        weights.append(weight)
    
    W_F = get_full_weight(weights)
    
# We now construct F, the spatial grid within a subdomain
    X_F = retrieve_data_mat(spatiotemporal_grid, X)
    F = filterX(X, j, endpts, t)

    return np.sum(np.dot(W_F, F))

In [4]:
# Matrix to obtain weight

def get_1D_weight(grid, endpt):
    '''
    Parameters: 
        grid: an 1D array that contains the value of the corresponding dimension of each grid points.
        
        endpts: 1 x 2 array 
            the first element is the left endpoints of this dimensions in terms of index,
            second element is the left endpoints of this dimensions in terms of index,
    '''
    
#     initialize a bunch of 0
    weight = np.zeros(len(grid))

#     find the index at which we enter Omega_k in this axis
    start = 0
    end = 0
    record_start = True
    record_end = True
    for i in np.arange(len(grid)):
        if (grid[i] >= endpt[0] and record_start == True):
            start = i
            record_start = False
        if (grid[i] >= endpt[1] and record_end == True):
            end = i
            record_end = False
            
#     the weight of all other grid points is 0 as they contribute nothing to the integral
#     and each grid point in omega_k needs a weight

#     start and end index has different equation for weight, so we do those first
    weight[start] = 1/2*(grid[start+1]-grid[start])
    weight[end] = 1/2*(grid[end]-grid[end-1])
    for i in np.arange(end-start-1): 
        weight[start+i+1] = 1/2*(grid[start+i+2]-grid[start+i])
    
    return weight

def get_full_weight(weights):
    '''
    weights: a list of lists, where each inner list is the 1D weight in a dimension. 
    '''
    ndim = len(weights)
    W_F = np.array(weights[0])
    for w in np.arange(ndim-1)+1:
        index = [slice(None)]*(w+1)
        index[-1] = np.newaxis

        W_F = W_F[index] * np.array(weights[w])
    return W_F

In [5]:
# Methods to filter data matrix X
def retrieve_data_mat(spatiotemporal_grid, X):
    overallShape = list(np.shape(spatiotemporal_grid)) + [np.shape(X)[-1]]
    return X.reshape(overallShape)

def filterX(X, j, bound, t_ind):
#     filter by feature j first
    index = [0]*len(np.shape(X))
    for i in range(np.shape(bound)[0]):
        index[i] = slice(bound[i][0], bound[i][1])
    index[-2] = t_ind
    index[-1] = j
    return X[tuple(index)]

In [6]:
def get_theta_nonloc(spatiotemporal_grid, j, k, kprime, endpts):
    '''
    Parameters:
        spatiotemporal_grid: The spatiotemporal_grid that contains information about spatial and time data points.
        j: the index of u that we are looking for
        k: the index of subdomain to be used by the indicator function
        kprime: the index of the subdomain to be used as boundary of integral
        endpts: boundary of each subdomain correspond to each dimension in terms of indexing. 
        
    return: 
        vector Theta^nonloc_p
    '''
#     get how many time points are there
    num_t = np.shape(spatiotemporal_grid)[-2]
#     get how many spatial points are there
    num_x = np.prod(np.shape(spatiotemporal_grid)[:-2])
    
    theta_nonloc_p = np.zeros(num_t*num_x)
    
    for i in np.arange(theta_nonloc_p.length):
        this_t = i % num_t
        this_x = int(i/num_t)
        
        coefficient = indicator(this_x, endpts[k])
        
        integral = compute_integral(X, spatiotemporal_grid, this_t, j, endpts[kprime])
        
        theta_nonloc_p[i] = coefficient * integral
        
    return theta_nonloc_p

# Module test starts here.

In [7]:
# 1D weight test starts here. 
sample_grid = np.linspace(0, 10, 21, endpoint=True)
endpts = [4, 5]
print(sample_grid)
print(get_1D_weight(sample_grid, endpts))

[ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5
  7.   7.5  8.   8.5  9.   9.5 10. ]
[0.   0.   0.   0.   0.   0.   0.   0.   0.25 0.5  0.25 0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.  ]


# The above cell shows that the 1D weight calculation is performing properly. We now test the computation to get the full weight.

In [8]:
# test using a 4D weight, formatted as the output of  
test_weights = [[1,5,7,45],[1,6,8],[45,7],[3,5,7,8,2,65,78,89]]
W_F = get_full_weight(test_weights)

# Goal: W_F[x, y,..., a] = test_weights[0][x]*test_weights[1][y]*...*test_weights[-1][a]

# for this test, we will just use a stacked for loop to make sure we are computing the right value. Generality is put aside for now.
for x in range(len(test_weights[0])):
    for y in range(len(test_weights[1])):
        for z in range(len(test_weights[2])):
            for a in range(len(test_weights[3])):
                if W_F[x, y, z, a] != test_weights[0][x]*test_weights[1][y]*test_weights[2][z]*test_weights[3][a]:
                    print("Failed")
                    break
print("Pass")

Pass


# Reshape Demo

Some interesting things I've found:

    Out here we use A.reshape to reshape an array, and the transformation from our original matrix to 2D data matrix X is indeed simply A.reshape.

In [9]:
# Methods to create a testing input X

from typing import List
from sklearn.base import TransformerMixin
HANDLED_FUNCTIONS = {}

class AxesArray(np.lib.mixins.NDArrayOperatorsMixin, np.ndarray):
    """A numpy-like array that keeps track of the meaning of its axes.

    Paramters:
        input_array (array-like): the data to create the array.
        axes (dict): A dictionary of axis labels to shape indices.
            Allowed keys:
                - ax_time: int
                - ax_coord: int
                - ax_sample: int
                - ax_spatial: List[int]

    Raises:
        AxesWarning if axes does not match shape of input_array
    """

    def __new__(cls, input_array, axes):
        obj = np.asarray(input_array).view(cls)
        defaults = {
            "ax_time": None,
            "ax_coord": None,
            "ax_sample": None,
            "ax_spatial": [],
        }
        if axes is None:
            return obj
        obj.__dict__.update({**defaults, **axes})
        return obj

    def __array_finalize__(self, obj) -> None:
        if obj is None:
            return
        self.ax_time = getattr(obj, "ax_time", None)
        self.ax_coord = getattr(obj, "ax_coord", None)
        self.ax_sample = getattr(obj, "ax_sample", None)
        self.ax_spatial = getattr(obj, "ax_spatial", [])

    @property
    def n_spatial(self):
        return tuple(self.shape[ax] for ax in self.ax_spatial)

    @property
    def n_time(self):
        return self.shape[self.ax_time] if self.ax_time is not None else 1

    @property
    def n_sample(self):
        return self.shape[self.ax_sample] if self.ax_sample is not None else 1

    @property
    def n_coord(self):
        return self.shape[self.ax_coord] if self.ax_coord is not None else 1

    def __array_ufunc__(
        self, ufunc, method, *inputs, out=None, **kwargs
    ):  # this method is called whenever you use a ufunc
        args = []
        for input_ in inputs:
            if isinstance(input_, AxesArray):
                args.append(input_.view(np.ndarray))
            else:
                args.append(input_)

        outputs = out
        if outputs:
            out_args = []
            for output in outputs:
                if isinstance(output, AxesArray):
                    out_args.append(output.view(np.ndarray))
                else:
                    out_args.append(output)
            kwargs["out"] = tuple(out_args)
        else:
            outputs = (None,) * ufunc.nout
        results = super().__array_ufunc__(ufunc, method, *args, **kwargs)
        if results is NotImplemented:
            return NotImplemented
        if method == "at":
            return
        if ufunc.nout == 1:
            results = (results,)
        results = tuple(
            (AxesArray(np.asarray(result), self.__dict__) if output is None else output)
            for result, output in zip(results, outputs)
        )
        return results[0] if len(results) == 1 else results

    def __array_function__(self, func, types, args, kwargs):
        if func not in HANDLED_FUNCTIONS:
            arr = super(AxesArray, self).__array_function__(func, types, args, kwargs)
            if isinstance(arr, np.ndarray):
                return AxesArray(arr, axes=self.__dict__)
            elif arr is not None:
                return arr
            return
        if not all(issubclass(t, AxesArray) for t in types):
            return NotImplemented
        return HANDLED_FUNCTIONS[func](*args, **kwargs)
    
# This is how we created X from the original list of stuff, 
def concat_sample_axis(x_list: List[AxesArray]):
    """Concatenate all trajectories and axes used to create samples."""
    new_arrs = []
    for x in x_list:
        sample_axes = (
            x.ax_spatial
            + ([x.ax_time] if x.ax_time is not None else [])
            + ([x.ax_sample] if x.ax_sample is not None else [])
        )
        
#         print(sample_axes)
        
        new_axes = {"ax_sample": 0, "ax_coord": 1}
        n_samples = np.prod([x.shape[ax] for ax in sample_axes])
        
#         print(n_samples)
        
#         the new 2D data matrix is literally created with a reshape
#         print(x.reshape((n_samples, x.shape[x.ax_coord])))
        arr = AxesArray(x.reshape((n_samples, x.shape[x.ax_coord])), new_axes)
#         Actually, this is problematic. We only did a reshape without doing any filtering and stuff 
#         so we cannot guarantee each column is indeed a feature
        
#         and each 2D data matrix (for their corresponding trajectory) is put into a list. 
        new_arrs.append(arr)
    return np.concatenate(new_arrs, axis=new_arrs[0].ax_sample)

In [17]:
# space_1, space_2, t, coord(feature)
# We carry the assumption that the first d-2 axis are spatial, second last is time, and last is coords. 
A = np.random.rand(12, 13, 7, 2)
axes = {"ax_spatial": [0, 1], "ax_time": 2, "ax_coord": 3}
A_ = AxesArray(A, axes)

# need brackets around A_ as input is list of trajectories
A_2 = concat_sample_axis([A_])

# indeed, an reshape retrieves the original matrix.
np.linalg.norm(A_2.reshape(A.shape)-A)

0.0

# We now test the part where we filter data points in a subdomain out of X.

### We carry the assumption that the first d-2 axis are spatial, second last is time, and last is coords. 

In [19]:
from scipy.io import loadmat

# Construct a sample input for our program.
data = loadmat('pysindy-master/examples/data/burgers.mat')
time = np.ravel(data['t'])
x = np.ravel(data['x'])
time = np.ravel(data['t'])
X, T = np.meshgrid(x, time)

axes = {"ax_spatial": [0], "ax_time": 1, "ax_coord": 2}
A = AxesArray(np.asarray([X, T]).T, axes)

# Here we only care about the size of the grid, which will match the size of A without the feature axis.
# spatiotemporal_grid is a parameter of our program
spatiotemporal_grid = np.zeros(np.shape(np.asarray([X, T]).T)[0:-1])

# A_ is a parameter of our program
A_ = concat_sample_axis([A])

Data = retrieve_data_mat(spatiotemporal_grid, A_)

print(np.linalg.norm(Data-A))

0.0


### With Data matrix retrieved, we now filter it.

In [20]:
# We want the first feature
j = 0
# We want the 50-th time point
t_ind = 50
# The subdomain is bounded by the 10-th and 20-th point. We only have 1D but if we have more dimensions, 
# x_bound is expected to be a list of list, with bounds of each spatial dimensions in order.
x_bound = [[10, 20]]

Expected = np.asarray([X, T]).T[x_bound[0][0]:x_bound[0][1], 50, 0]

obtained = filterX(Data, j, x_bound, t_ind)

print(np.linalg.norm(Expected - obtained))

0.0


# Moment of Truth: Test integral calculation

In [24]:
from sklearn.metrics import mean_squared_error
u = np.real(data['usol'])
rmse = mean_squared_error(u, np.zeros(u.shape), squared=False)
u = u + np.random.normal(0, rmse / 5.0, u.shape)

In [25]:
u.shape

(256, 101)

In [22]:
spacePts = np.shape(np.asarray([X, T]).T)[0]
timePts = np.shape(np.asarray([X, T]).T)[1]

(256, 101, 2)

In [None]:
compute_integral(X, spatiotemporal_grid, t, j, endpts)