In [1]:
import numpy as np
from scipy.linalg import svd, norm
from numpy.random import randn, rand
np.random.seed(0)

In [None]:
# generate random data matrix
n,d = 6,4
X = randn(n,d)

# optional: give it linearly dependent columns
X[:,3] = X[:,2]
X

In [3]:
U,S,V = svd(X, full_matrices=False)

In [4]:
U.T@U

array([[ 1.00000000e+00, -7.38697485e-17,  3.23622199e-17,
        -3.99817880e-17],
       [-7.38697485e-17,  1.00000000e+00, -3.81412273e-17,
         5.93941004e-17],
       [ 3.23622199e-17, -3.81412273e-17,  1.00000000e+00,
        -4.16017539e-17],
       [-3.99817880e-17,  5.93941004e-17, -4.16017539e-17,
         1.00000000e+00]])

array([[ 1.00000000e+00, -7.38697485e-17,  3.23622199e-17,
        -3.99817880e-17],
       [-7.38697485e-17,  1.00000000e+00, -3.81412273e-17,
         5.93941004e-17],
       [ 3.23622199e-17, -3.81412273e-17,  1.00000000e+00,
        -4.16017539e-17],
       [-3.99817880e-17,  5.93941004e-17, -4.16017539e-17,
         1.00000000e+00]])

In [5]:
np.allclose(U.T@U, np.identity(d))

True

True

In [6]:
U@U.T

array([[ 0.80175178, -0.06399596, -0.00612596,  0.30096884,  0.24817008,
         0.05140258],
       [-0.06399596,  0.9701751 ,  0.00504366,  0.0348882 ,  0.15022773,
         0.03207643],
       [-0.00612596,  0.00504366,  0.9944328 ,  0.05699362, -0.04603767,
        -0.01027116],
       [ 0.30096884,  0.0348882 ,  0.05699362,  0.12011976,  0.0995328 ,
         0.02713897],
       [ 0.24817008,  0.15022773, -0.04603767,  0.0995328 ,  0.15300143,
        -0.18278125],
       [ 0.05140258,  0.03207643, -0.01027116,  0.02713897, -0.18278125,
         0.96051913]])

array([[ 0.80175178, -0.06399596, -0.00612596,  0.30096884,  0.24817008,
         0.05140258],
       [-0.06399596,  0.9701751 ,  0.00504366,  0.0348882 ,  0.15022773,
         0.03207643],
       [-0.00612596,  0.00504366,  0.9944328 ,  0.05699362, -0.04603767,
        -0.01027116],
       [ 0.30096884,  0.0348882 ,  0.05699362,  0.12011976,  0.0995328 ,
         0.02713897],
       [ 0.24817008,  0.15022773, -0.04603767,  0.0995328 ,  0.15300143,
        -0.18278125],
       [ 0.05140258,  0.03207643, -0.01027116,  0.02713897, -0.18278125,
         0.96051913]])

In [7]:
np.allclose(V.T @ V, np.identity(d))

True

True

In [8]:
np.allclose(V @ V.T, np.identity(d))

True

True

In [9]:
S

array([4.15760175e+00, 2.28949949e+00, 1.01350732e+00, 1.48389401e-16])

array([4.15760175e+00, 2.28949949e+00, 1.01350732e+00, 1.48389401e-16])

In [10]:
# if we have a linearly dependent column, 
# decomposition is just as good if we ignore the 0 in sigma and reduce r by 1
for k in range(d):
    print(f"Error of rank {k+1} approximation: ", 
          np.linalg.norm(X - U[:,:k]@np.diag(S[:k])@(V[:,:k]).T))
    

Error of rank 1 approximation:  4.853314053310529
Error of rank 2 approximation:  3.4882492591277985
Error of rank 3 approximation:  3.246556769278742
Error of rank 4 approximation:  3.1814616416082724
Error of rank 1 approximation:  4.853314053310529
Error of rank 2 approximation:  3.4882492591277985
Error of rank 3 approximation:  3.246556769278742
Error of rank 4 approximation:  3.1814616416082724


In [11]:
# form data from noisy linear model
wtrue = randn(d)
y = X@wtrue + .1*randn(n);

In [12]:
# solve least squares problem to estimate w
w = V@np.diag(S**(-1))@U.T@y
w

array([-3.78124163e+13,  1.48442532e+14, -2.50043200e+13,  1.55210116e+14])

array([-3.78124163e+13,  1.48442532e+14, -2.50043200e+13,  1.55210116e+14])

In [13]:
# use rank k approximation to design matrix X
# k=4 is full rank
# when design matrix X has rank 3, k=3 gives 0 error approximation
# while k=2 results in some loss of accuracy - but not much!
k = 3
w = V[:,:k]@np.diag(S[:k]**(-1))@(U[:,:k]).T@y
w

array([ 2.32744214, -0.25484184, -1.30800754,  0.60002364])

array([ 2.32744214, -0.25484184, -1.30800754,  0.60002364])

In [14]:
# error in normal equations 
norm(X.T@X@w - X.T@y)

5.724543154969718

5.724543154969718

In [15]:
w[2] += 1
w[3] -= 1
norm(X.T@X@w - X.T@y)

5.724543154969718

5.724543154969718

In [16]:
w

array([ 2.32744214, -0.25484184, -0.30800754, -0.39997636])

array([ 2.32744214, -0.25484184, -0.30800754, -0.39997636])

In [17]:
# how good is our estimate of w?
norm(w - wtrue) / norm(wtrue)

0.46987491310072677

0.46987491310072677

In [18]:
# compute mean square error
np.mean((y - X@w)**2)

0.5699848419421699

0.5699848419421699