In [21]:
import numpy as np
from scipy.linalg import svd, norm
from numpy.random import randn, rand
np.random.seed(0)

In [22]:
# generate random data matrix
n,d = 6,4
X = randn(n,d)

# optional: give it linearly dependent columns
X[:,3] = X[:,2]
X

array([[ 1.76405235,  0.40015721,  0.97873798,  0.97873798],
       [ 1.86755799, -0.97727788,  0.95008842,  0.95008842],
       [-0.10321885,  0.4105985 ,  0.14404357,  0.14404357],
       [ 0.76103773,  0.12167502,  0.44386323,  0.44386323],
       [ 1.49407907, -0.20515826,  0.3130677 ,  0.3130677 ],
       [-2.55298982,  0.6536186 ,  0.8644362 ,  0.8644362 ]])

In [24]:
# find a vector w in the nullspace of X
w = np.zeros(d)
w[2] = -1
w[3] = 1
X@w

array([0., 0., 0., 0., 0., 0.])

In [25]:
U,S,Vt = svd(X, full_matrices=False)

In [26]:
np.allclose(U@np.diag(S)@Vt, X)

True

In [27]:
U.T@U

array([[ 1.00000000e+00, -7.38697485e-17,  3.23622199e-17,
        -3.99817880e-17],
       [-7.38697485e-17,  1.00000000e+00, -3.81412273e-17,
         5.93941004e-17],
       [ 3.23622199e-17, -3.81412273e-17,  1.00000000e+00,
        -4.16017539e-17],
       [-3.99817880e-17,  5.93941004e-17, -4.16017539e-17,
         1.00000000e+00]])

In [28]:
np.allclose(U.T@U, np.identity(d))

True

In [30]:
U.shape

(6, 4)

In [29]:
U@U.T

array([[ 0.80175178, -0.06399596, -0.00612596,  0.30096884,  0.24817008,
         0.05140258],
       [-0.06399596,  0.9701751 ,  0.00504366,  0.0348882 ,  0.15022773,
         0.03207643],
       [-0.00612596,  0.00504366,  0.9944328 ,  0.05699362, -0.04603767,
        -0.01027116],
       [ 0.30096884,  0.0348882 ,  0.05699362,  0.12011976,  0.0995328 ,
         0.02713897],
       [ 0.24817008,  0.15022773, -0.04603767,  0.0995328 ,  0.15300143,
        -0.18278125],
       [ 0.05140258,  0.03207643, -0.01027116,  0.02713897, -0.18278125,
         0.96051913]])

In [34]:
Vt.shape

(4, 4)

In [31]:
np.allclose(Vt @ Vt.T, np.identity(d))

True

In [32]:
np.allclose(Vt.T @ Vt, np.identity(d))

True

In [35]:
S

array([4.15760175e+00, 2.28949949e+00, 1.01350732e+00, 1.48389401e-16])

In [37]:
# if we have a linearly dependent column, 
# decomposition is just as good if we ignore the 0 in sigma and reduce r by 1
for k in range(d+1):
    print(f"Error of rank {k} approximation: ", 
          np.linalg.norm(X - U[:,:k]@np.diag(S[:k])@(Vt[:k,:])))
    

Error of rank 0 approximation:  4.853314053310529
Error of rank 1 approximation:  2.5037981161489284
Error of rank 2 approximation:  1.0135073191135213
Error of rank 3 approximation:  2.0282945925593685e-15
Error of rank 4 approximation:  2.0404123996834285e-15


In [38]:
# form data from noisy linear model
wtrue = randn(d)
y = X@wtrue + .1*randn(n);

In [39]:
# solve least squares problem to estimate w
w = Vt.T@np.diag(S**(-1))@U.T@y
w

array([ 2.32981892e+00, -1.49417792e+00, -1.55210116e+14,  1.55210116e+14])

In [43]:
# use rank k approximation to design matrix X
# k=4 is full rank
# when design matrix X has rank 3, k=3 gives 0 error approximation
# while k=2 results in some loss of accuracy - but not much!
k = 3
w = Vt[:k,:].T@np.diag(S[:k]**(-1))@(U[:,:k]).T@y
w

array([ 2.32981892, -1.45396574, -0.07338535, -0.07338535])

In [41]:
# error in normal equations 
norm(X.T@X@w - X.T@y)

2.175583928816829e-15

In [16]:
w[2] += 1
w[3] -= 1
norm(X.T@X@w - X.T@y)

1.2560739669470201e-15

In [17]:
w

array([ 2.32981892, -1.45396574,  0.92661465, -1.07338535])

In [18]:
# how good is our estimate of w?
norm(w - wtrue) / norm(wtrue)

0.4628663965326436

In [19]:
# compute mean square error
np.mean((y - X@w)**2)

0.006294618627216451

In [20]:
# we can use the numpy.lstsq call instead
w_lstsq = np.linalg.lstsq(X, y, rcond=None)[0]
norm(w_lstsq - w)

1.4142135623730956