In [1]:
import sys
sys.path.insert(0, '/home/ldoyle/packages')
import h5py
import numpy as np
from tqdm import tqdm
import matplotlib
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import pandas as pd
import pysindy as ps
from lr_ed import localreg
from datetime import datetime
from scipy.fft import fft, fftfreq,rfft2

import scipy.signal as signal
from obspy.signal import filter as obsfilt
import glob


In [2]:
WEAK = False

x_len = 5000
t_len = 6000

xs = np.arange(x_len)
dt = 1

library_functions = [lambda x: x, lambda x: x * x]
library_function_names = [lambda x: x, lambda x: x + x]  

if WEAK:
    X, T = np.meshgrid(xs, np.arange(t_len))
    XT = np.asarray([X, T]).T

    pde_lib = ps.WeakPDELibrary(
        library_functions=library_functions,
        function_names=library_function_names,
        derivative_order=4,
        spatiotemporal_grid=XT,
        is_uniform=True,
        K=1000,
    )
else:
    
    pde_lib = ps.PDELibrary(
        library_functions=library_functions,
        function_names=library_function_names,
        derivative_order=4,
        spatial_grid=xs,
        include_bias=True,
        is_uniform=True,
    )
    

In [3]:
def printEnsemble(model, cut_off = 1e-3, median= False):
    if median:
        coefs = np.median(model.coef_list, axis=0)[0,:]
    else:
        coefs = np.mean(model.coef_list, axis=0)[0,:]
    above_cut_off = np.argwhere(np.abs(coefs)>cut_off).flatten()
    features =  model.get_feature_names()
    equation_str= "(x0)' = "
    for count, val in enumerate(above_cut_off):
        if count >0:
            equation_str+=" + "
        equation_str+= "{0:.4f}".format(coefs[val])
        equation_str+= " "+features[val]
        
    print(equation_str)

In [4]:
def saveEnsemble(model,fileStr,thresh):
    np.save("/home/ldoyle/notebooks/ensembleSINDyArrays/"+fileStr+"_thresh_"+"{:.2E}".format(thresh), model.coef_list)
#     print(fileStr+"_thresh_"+"{:.2E}".format(thresh))

In [5]:
def SINDyImplementEnsemble(dataset, fileStr,thresh, alph=0.001, n_models=50, time_steps=200, stlsq_max =100,median=True):
    optimizer = ps.EnsembleOptimizer( opt=ps.STLSQ(threshold=thresh, alpha=alph, max_iter=stlsq_max),bagging=True,  n_models = n_models,n_subset =time_steps)  
    model = ps.SINDy(feature_library=pde_lib, optimizer=optimizer, differentiation_method= ps.differentiation.SmoothedFiniteDifference())
    model.fit(dataset, t=dt,ensemble=True )
    print("Ensemble STLSQ with Threshold "+str(thresh))
    printEnsemble(model,median=median)
    saveEnsemble(model,fileStr,thresh)

## Unfiltered

### Dataset 1

In [30]:
path = "/data/data2/south-data-ejm/hdd/South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T16_09_15-0700/"

fileStr = "2021-11-01T2311_unfiltered"

init = 11
final= 16

time_subsample = 10
full_dat = np.zeros((5000,(12000//time_subsample)*(final-init)))

k = 0
for i in np.arange(init,final):
    file = "South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T23"+str(i)+"14Z.h5"
    f = h5py.File(path+file, 'r')
    data = f['Acquisition']['Raw[0]']['RawData'][:, :].astype('int64')
#     timestamp = f['Acquisition']['Raw[0]']['RawDataTime'][:] / 1000000
    f.close()
    
    full_dat[:,k*(12000//time_subsample):(k+1)*(12000//time_subsample)] = data[7500:12500,::time_subsample]
    k+=1
    
working_dat = full_dat/np.std(full_dat)


In [31]:
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [32]:
thresh = 1.e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.6411 x0_1 + 0.3544 x0_111


In [33]:
thresh = 1.e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.6384 x0_1 + 0.3543 x0_111


In [34]:
thresh = 5e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.005
(x0)' = 0.6378 x0_1 + 0.3499 x0_111 + 0.0100 x0x0_1


In [35]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.6369 x0_1 + 0.3516 x0_111 + 0.0106 x0x0_1 + 0.0068 x0x0x0_1 + 0.0058 x0x0x0_111


In [36]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.6394 x0_1 + 0.0048 x0_11 + 0.3513 x0_111 + 0.0080 x0x0_1 + 0.0056 x0x0x0_1 + 0.0044 x0x0x0_111


### Dataset 2

In [37]:
time_subsample = 10
full_dat = np.zeros((5000,6000))
fileStr = "2021-11-01T2321_unfiltered"


k = 0
for i in np.arange(21,26):
    file = "South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T23"+str(i)+"14Z.h5"
    f = h5py.File(path+file, 'r')
    data = f['Acquisition']['Raw[0]']['RawData'][:, :].astype('int64')
#     timestamp = f['Acquisition']['Raw[0]']['RawDataTime'][:] / 1000000
    f.close()
    
    full_dat[:,k*(12000//time_subsample):(k+1)*(12000//time_subsample)] = data[7500:12500,::time_subsample]
    k+=1
    


In [38]:
working_dat = full_dat/np.std(full_dat)#*10**6
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [39]:
thresh = 1.e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.6769 x0_1 + 0.3876 x0_111


In [40]:
thresh = 1.e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.6666 x0_1 + 0.3801 x0_111


In [41]:
thresh = 5e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.005
(x0)' = 0.6719 x0_1 + 0.3846 x0_111


In [42]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.6655 x0_1 + -0.0078 x0_11 + 0.3825 x0_111 + -0.0038 x0x0_1 + 0.0081 x0x0x0_1 + -0.0123 x0x0_111 + 0.0054 x0x0x0_111


In [43]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = -0.0017 x0x0 + 0.6669 x0_1 + -0.0031 x0_11 + 0.3806 x0_111 + -0.0022 x0_1111 + -0.0034 x0x0_1 + 0.0077 x0x0x0_1 + -0.0022 x0x0_11 + 0.0026 x0x0x0_11 + -0.0114 x0x0_111 + 0.0078 x0x0x0_111


## Drift Removal Load

### Dataset 1

In [6]:
fileStr = "2021-11-01T2311_normal"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T231114Z.npy")[:,::10]
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [7]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.6893 x0_1 + 0.3284 x0_111


In [8]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.7055 x0_1 + 0.3357 x0_111


In [9]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.7133 x0_1 + 0.3361 x0_111 + 0.0041 x0x0_1 + -0.0158 x0x0x0_1 + -0.0065 x0x0x0_111


In [10]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.7117 x0_1 + 0.0055 x0_11 + 0.3367 x0_111 + 0.0022 x0x0_1 + -0.0174 x0x0x0_1 + 0.0034 x0x0_111 + -0.0093 x0x0x0_111


### Dataset 2

In [11]:
fileStr = "2021-11-01T2321_normal"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T232114Z.npy")[:,::10]
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [12]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.6743 x0_1 + 0.3169 x0_111


In [13]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.6767 x0_1 + 0.3179 x0_111


In [14]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.6815 x0_1 + 0.3201 x0_111 + 0.0028 x0x0_1


In [15]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.6828 x0_1 + -0.0049 x0_11 + 0.3202 x0_111 + -0.0029 x0x0x0_1 + 0.0034 x0x0_11 + 0.0032 x0x0x0_11


### Dataset 3

In [16]:
fileStr = "2021-11-02T0511_normal"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T051114Z.npy")[:,::10]
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [17]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.6798 x0_1 + 0.3204 x0_111


In [18]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.6872 x0_1 + 0.3232 x0_111


In [19]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.6890 x0_1 + -0.0083 x0_11 + 0.3277 x0_111 + -0.0035 x0x0x0_1


In [20]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.6921 x0_1 + -0.0024 x0_11 + 0.3291 x0_111 + -0.0061 x0x0x0_1 + -0.0014 x0x0_111


### Dataset 4

In [21]:
fileStr = "2021-11-02T2311_normal"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T231114Z.npy")[:,::10]
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [22]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.6189 x0_1 + 0.2938 x0_111


In [23]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.6197 x0_1 + 0.2941 x0_111


In [24]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.6234 x0_1 + 0.2959 x0_111 + -0.0101 x0x0x0_1


In [25]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.6212 x0_1 + -0.0059 x0_11 + 0.2935 x0_111 + -0.0020 x0_1111 + -0.0022 x0x0_1 + -0.0099 x0x0x0_1 + -0.0018 x0x0_111 + -0.0038 x0x0x0_111


## Multifilt

### Dataset 1

In [6]:
fileStr = "2021-11-01T2311_smoothed_rad_4"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/rad_4_smoothed_decimated_normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T231114Z.npy")
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [7]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.9937 x0_1


In [8]:
thresh = 5e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.05
(x0)' = 0.9938 x0_1


In [9]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.9944 x0_1 + 0.0475 x0_111 + 0.0450 x0x0x0_111


In [10]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.9921 x0_1 + 0.0469 x0_111 + 0.0032 x0x0x0_1 + 0.0454 x0x0x0_111


In [11]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.9912 x0_1 + 0.0466 x0_111 + 0.0033 x0x0x0_1 + 0.0489 x0x0x0_111


### Dataset 2

In [12]:
fileStr = "2021-11-01T2321_smoothed_rad_4"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/rad_4_smoothed_decimated_normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-01T232114Z.npy")
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [13]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.9939 x0_1


In [14]:
thresh = 5e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.05
(x0)' = 0.9943 x0_1 + 0.0599 x0_111


In [15]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.9947 x0_1 + 0.0550 x0_111 + 0.0472 x0x0x0_111


In [16]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.9943 x0_1 + 0.0544 x0_111 + 0.0478 x0x0x0_111


In [17]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.9919 x0_1 + 0.0542 x0_111 + 0.0027 x0x0x0_1 + 0.0525 x0x0x0_111


### Dataset 3

In [18]:
fileStr = "2021-11-02T0511_smoothed_rad_4"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/rad_4_smoothed_decimated_normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T051114Z.npy")
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [19]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.9942 x0_1


In [20]:
thresh = 5e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.05
(x0)' = 0.9944 x0_1 + 0.0645 x0_111


In [21]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.9949 x0_1 + 0.0578 x0_111 + 0.0445 x0x0x0_111


In [22]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.9947 x0_1 + 0.0579 x0_111 + 0.0458 x0x0x0_111


In [23]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.9924 x0_1 + 0.0563 x0_111 + 0.0023 x0x0x0_1 + 0.0502 x0x0x0_111


### Dataset 4

In [24]:
fileStr = "2021-11-02T2311_smoothed_rad_4"

working_dat = np.load("/home/ldoyle/notebooks/channel_drift_5_min_clips/rad_4_smoothed_decimated_normalized_5_min_start_South-C1-LR-95km-P1kHz-GL50m-SP2m-FS200Hz_2021-11-02T231114Z.npy")
scaler_val = np.std(working_dat)
working_dat = working_dat/scaler_val
working_dat = working_dat.reshape(working_dat.shape[0],working_dat.shape[1],1)


In [25]:
thresh = 1e-1
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.1
(x0)' = 0.9957 x0_1 + 0.1386 x0_111


In [26]:
thresh = 5e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.05
(x0)' = 0.9961 x0_1 + 0.1142 x0_111 + 0.0625 x0x0x0_111


In [27]:
thresh = 1e-2
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.01
(x0)' = 0.9963 x0_1 + 0.1119 x0_111 + 0.0615 x0x0x0_111


In [28]:
thresh = 3e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.003
(x0)' = 0.9960 x0_1 + 0.1103 x0_111 + 0.0636 x0x0x0_111


In [29]:
thresh = 1e-3
SINDyImplementEnsemble(working_dat, fileStr,thresh)

Ensemble STLSQ with Threshold 0.001
(x0)' = 0.9939 x0_1 + 0.1089 x0_111 + 0.0021 x0x0x0_1 + 0.0714 x0x0x0_111
