In [11]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn import linear_model

#load theta matrix, dt vector associated with Diffusion Equation with Diffusion Term only
theta_path = '/mnt/mbi/home/e0031794/Documents/FYP/FYP_results_11_9_2019/data_slicing_val_diff_10_1/1_trial/500_subset_clean/Out_subset_500_Original DeepMod/20200220_110440/theta.npy'

theta = np.load(theta_path)

dt_path = '/mnt/mbi/home/e0031794/Documents/FYP/FYP_results_11_9_2019/data_slicing_val_diff_10_1/1_trial/500_subset_clean/Out_subset_500_Original DeepMod/20200220_110440/time_deriv.npy'

dt = np.load(dt_path)[0]
#deepmod: fit first, then normalise theta/dt and epsilon to generate bit mask

In [18]:
#vanilla LR on theta, dt from above
print('Weight Normalisation and consequences demo')
print('Normalisation for LR')
norm_lr = LinearRegression(normalize=True).fit(theta, dt) #creates LR model

print('Coeffs: ', str(norm_lr.coef_))
norm_coeff = norm_lr.coef_
upper_lim, lower_lim = np.median(norm_coeff) + np.std(norm_coeff), np.median(norm_coeff) - np.std(norm_coeff)
sparsity_mask_lr = (norm_coeff <= upper_lim) & (norm_coeff >= lower_lim)
print('sparse_pattern: ', str(~sparsity_mask_lr) + '\n')

print('No Normalisation for LR')
lr = LinearRegression(normalize=False).fit(theta, dt) #creates LR model

print('Coeffs: ', str(lr.coef_))
LR_coeff = lr.coef_
upper_lim, lower_lim = np.median(LR_coeff) + np.std(LR_coeff), np.median(LR_coeff) - np.std(LR_coeff)
sparsity_mask_lr = (LR_coeff <= upper_lim) & (LR_coeff >= lower_lim)
print('sparse_pattern: ', str(~sparsity_mask_lr) + '\n')

print('Notice 3rd coefficient, which corresponds to diffusion coefficient is an invariant,')
print('for ordinary least squares, irregardless of normalisation')

Weight Normalisation and consequences demo
Normalisation for LR
Coeffs:  [[ 0.         -0.83730394  8.745507   14.349687   -0.07737297 -1.692133
   0.9870669   5.4431195   0.02038591  0.8381659  -0.07162718  1.537311  ]]
sparse_pattern:  [[False False  True  True False False False  True False False False False]]

No Normalisation for LR
Coeffs:  [[ 0.         -0.837323    8.745499   14.349821   -0.07737497 -1.6921293
   0.9870912   5.44313     0.02038723  0.8381655  -0.0716278   1.5373114 ]]
sparse_pattern:  [[False False  True  True False False False  True False False False False]]

Notice 3rd coefficient, which corresponds to diffusion coefficient is an invariant,
for ordinary least squares, irregardless of normalisation


In [19]:
#Lasso LR on theta, dt from above
print('Normalisation for Lasso LR')
norm_L1_lr = linear_model.Lasso(alpha=1e-05, normalize=True, max_iter=50000, tol=1e-06).fit(theta, dt)

#normalise: substract mean, divided by L2 norm, we normalise then we fit L1 regression
print('Coeffs: ', str(norm_L1_lr.sparse_coef_.toarray()))
norm_L1_coeff = norm_L1_lr.sparse_coef_.toarray()
upper_lim, lower_lim = np.median(norm_L1_coeff ) + np.std(norm_L1_coeff ), np.median(norm_L1_coeff ) - np.std(norm_L1_coeff )
sparsity_mask_l1 = (norm_L1_coeff  <= upper_lim) & (norm_L1_coeff  >= lower_lim)
print('sparse_pattern: ', str(~sparsity_mask_l1) + '\n')

print('No Normalisation for Lasso LR')
L1_lr = linear_model.Lasso(alpha=1e-05, normalize=False, max_iter=50000, tol=1e-06).fit(theta, dt)

#normalise: substract mean, divided by L2 norm, we normalise then we fit L1 regression
print('Coeffs', str(L1_lr.sparse_coef_.toarray()))
L1_coeff = L1_lr.sparse_coef_.toarray()
upper_lim, lower_lim = np.median(L1_coeff) + np.std(L1_coeff), np.median(L1_coeff) - np.std(L1_coeff)
sparsity_mask_l1 = (L1_coeff <= upper_lim) & (L1_coeff >= lower_lim)

print('sparse_pattern: ', str(~sparsity_mask_l1) + '\n')

print('Notice 3rd coefficient, which corresponds to diffusion coefficient is not an invariant,')
print('with and without normalisation')

Normalisation for Lasso LR
Coeffs:  [[ 0.00000000e+00 -8.78933012e-01  8.97778225e+00  1.24961405e+01
  -8.45130086e-02  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  1.52698979e-01 -2.59345165e-04  3.88002872e-01]]
sparse_pattern:  [[False False  True  True False False False False False False False False]]

No Normalisation for Lasso LR
Coeffs [[ 0.         -0.80563444  8.722532   14.137315   -0.08469176 -1.698046
   0.9861577   5.418448    0.02130774  0.83511597 -0.07094496  1.5308359 ]]
sparse_pattern:  [[False False  True  True False False False  True False False False False]]

Notice 3rd coefficient, which corresponds to diffusion coefficient is not an invariant,
with and without normalisation


  positive)


In [15]:
print('Now, compare the sparse pattern obtained from Deepmod, with the above')
sparse_pattern_from_deepmod = np.load('/mnt/mbi/home/e0031794/Documents/FYP/FYP_results_11_9_2019/data_slicing_val_diff_10_1/1_trial/500_subset_clean/google_drive_storage_Original DeepMod/sparse_pattern_500.npy')
print('sparse_pattern from deepmod: ', sparse_pattern_from_deepmod[0])

Now, compare the sparse pattern obtained from Deepmod, with the above
sparse_pattern from deepmod:  [[False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]]


In [20]:
#refitting with LR and L1 LR. select 3rd, 4th columns of theta matrix based on sparse pattern generated from
#L1 regression
#This section simulates the refitting procedure of Deepmod
#L1 and Ordinary Least squares regressions are used for comparison purposes.
reduced_theta = theta[:, [2,3]]

#vanilla LR
lr = LinearRegression().fit(reduced_theta, dt) #creates LR model
print('LR coeff', lr.coef_)

#L1 regression
L1_lr = linear_model.Lasso(alpha=1e-05, normalize=True, max_iter=50000, tol=1e-06).fit(reduced_theta, dt)
print('L1 coeff', L1_lr.sparse_coef_.toarray())

LR coeff [[9.679331  4.0417533]]
L1 coeff [[9.623247 3.921462]]
