In [None]:
%load_ext autoreload
%autoreload 2 
%reload_ext autoreload
%matplotlib inline
import matplotlib.pyplot as plt

# always import gbm_algos first !
import xgboost, lightgbm, catboost
from gplearn.genetic import SymbolicRegressor

# To access the contents of the parent dir
import sys; sys.path.insert(0, '../')
import os
from scipy.io import loadmat
from utils import *
from preprocess import *
from models import RobustPCANN

# Let's do facy optimizers
from optimizers import Lookahead, AdamGC, SGDGC
from madgrad import MADGRAD
from lbfgsnew import LBFGSNew

from pytorch_robust_pca import *

# Modify at /usr/local/lib/python3.9/site-packages/torch_lr_finder/lr_finder.py
from torch_lr_finder import LRFinder

# Tracking
from tqdm import trange

import sympy
import sympytorch

In [None]:
# Loading the KS sol
DATA_PATH = "../deephpms_data/KS_simple3.pkl"
data = pickle_load(DATA_PATH)
t = data['t']
x = data['x']
X, T = np.meshgrid(x, t)
Exact = data['u'].T
u_star = Exact.T.flatten()[:,None]

In [None]:
# Adding noise
noise_intensity = 1
u_star_clean = u_star
noise = perturb(u_star, intensity=noise_intensity, noise_type="normal", overwrite=False)
u_star = u_star + noise
print("Perturbed u_star with intensity =", float(noise_intensity))

print("Running Robust PCA on u_star")
rpca = R_pca_numpy(u_star)
u_star_L, u_star_S = rpca.fit(tol=1e-16, max_iter=10000, iter_print=100, verbose=False)
print('Robust PCA Loss:', mean_squared_error(u_star, u_star_L+u_star_S))

In [None]:
((noise-u_star_S)**2).mean()

In [None]:
out, PSD = fft1d_denoise(to_tensor(u_star), thres=None, c=0)
out = out.detach().numpy()
PSD = PSD.detach().numpy()

In [None]:
((noise-(u_star-out))**2).mean()

In [None]:
n_exps = 50
all_u_star_S = np.zeros((u_star_S.shape[0], n_exps))
all_fft_noises = np.zeros(all_u_star_S.shape)
# all_out_c1 = np.zeros(all_u_star_S.shape)
all_noises = np.zeros(all_u_star_S.shape)

Some goals
    
    - Is denoising using FFT better than the Robust PCA algo for init?
    - If so, what is the recommended c wrt the varied noise_intensity?

In [None]:
for i in range(n_exps):
    print(i+1)
    # Adding noise
    noise_intensity = 1
    u_star = Exact.T.flatten()[:,None]
    noise = perturb(u_star, intensity=noise_intensity, noise_type="normal", overwrite=False)
    u_star = u_star + noise
    print("Perturbed u_star with intensity =", float(noise_intensity))
    all_noises[:, i:i+1] = noise

    print("Running Robust PCA on u_star")
    rpca = R_pca_numpy(u_star)
    u_star_L, u_star_S = rpca.fit(tol=1e-16, max_iter=10000, iter_print=100, verbose=False)
    all_u_star_S[:, i:i+1] = u_star_S
    
    print("Running FFT c=0 on u_star")
    out, PSD = fft1d_denoise(to_tensor(u_star), thres=None, c=0)
#     PSD = PSD.detach().numpy()
    all_fft_noises[:, i:i+1] = (u_star-out.detach().numpy())
    
#     print("Running FFT c=1 on u_star")
#     out, PSD = fft1d_denoise(to_tensor(u_star), thres=None, c=1)
#     out = out.detach().numpy()
#     PSD = PSD.detach().numpy()
#     all_out_c1[:, i:i+1] = out
    
    print()
    
print("DONE...")

In [None]:
rpca_mse = []
fft_mse = []
for i in range(n_exps):
    rpca_mse.append(((all_noises[:, i:i+1]-all_u_star_S[:, i:i+1])**2).mean())
    fft_mse.append(((all_noises[:, i:i+1]-all_fft_noises[:, i:i+1])**2).mean())

In [None]:
from scipy.stats import ttest_ind

In [None]:
t_stat, p_val = ttest_ind(fft_mse, rpca_mse, equal_var=False, alternative='less')
if p_val/2 < 0.05: print("FFT is better than Robust PCA")
else: print("Robust PCA is better than FFT")

In [None]:
t_stat, p_val # (-780.8335423299599, 2.6398217397035094e-102)
# print("FFT c=0 is better than Robust PCA")

In [None]:
import seaborn as sns
import pandas as pd

In [None]:
tmp = pd.DataFrame({"FFT":fft_mse, "Robust PCA":rpca_mse})
sns.histplot(data=tmp, stat="count")
plt.xlabel("MSE to the true noise")
plt.show()
# tmp