In [13]:
using LinearAlgebra, Random, Statistics

In [14]:
# generate random data matrix
n,d = 10,4
X = randn(n,d)

# optional: give it linearly dependent columns
X[:,3] = X[:,2]
X

10×4 Array{Float64,2}:
 -0.164465   0.110059   0.110059   1.75391 
  0.292468   0.8759     0.8759    -0.538361
  0.697349   0.557067   0.557067   0.197553
 -0.166506  -0.768746  -0.768746  -0.78873 
 -0.454266   0.398155   0.398155   0.502666
  1.34205   -0.128545  -0.128545   0.7512  
  1.06331    0.462167   0.462167   0.917743
 -0.999016  -1.35781   -1.35781    0.115135
 -2.60263   -0.173395  -0.173395  -0.696474
  0.37391    0.599984   0.599984  -1.41404 

In [15]:
U,σ,V = svd(X)

SVD{Float64,Float64,Array{Float64,2}}([0.128366 -0.469096 0.465738 0.123233; 0.174857 0.397857 0.169454 0.872092; … ; -0.623099 0.179289 0.473141 0.00366621; 0.0706736 0.569754 -0.154467 -0.214903], [3.91558, 2.86535, 2.2191, 2.29805e-16], [0.805257 0.354018 0.354018 0.317654; -0.0317766 0.404048 0.404048 -0.820049; -0.592074 0.4598 0.4598 0.476041; 0.0 -0.707107 0.707107 7.16094e-15])

In [16]:
U'*U

4×4 Array{Float64,2}:
 1.0          4.59931e-17  7.2144e-17   1.05418e-16
 4.59931e-17  1.0          2.3414e-16   1.57187e-16
 7.2144e-17   2.3414e-16   1.0          2.49358e-16
 1.05418e-16  1.57187e-16  2.49358e-16  1.0        

In [17]:
V'*V

4×4 Array{Float64,2}:
  1.0          -3.66493e-17  -6.36027e-17  -1.0975e-16 
 -3.66493e-17   1.0          -5.00695e-17  -1.55059e-16
 -6.36027e-17  -5.00695e-17   1.0          -7.11794e-17
 -1.0975e-16   -1.55059e-16  -7.11794e-17   1.0        

In [18]:
σ

4-element Array{Float64,1}:
 3.9155752844282916    
 2.8653538541498675    
 2.2190951682959965    
 2.2980473636897407e-16

In [19]:
# decomposition is just as good if we ignore the 0 in sigma and reduce r by 1
norm(X - U[:,1:3]*Diagonal(σ[1:3])*(V[:,1:3])')

2.4240070209636967e-14

In [20]:
# form data from noisy linear model
w♮ = randn(d)
y = X*w♮ + .1*randn(n);

In [28]:
# solve least squares problem to estimate w

# full svd - takes inverse of 0!
w = V*Diagonal(σ.^(-1))*U'*y

# thin svd
w = V[:,1:3]*Diagonal(σ[1:3].^(-1))*(U[:,1:3])'*y

4-element Array{Float64,1}:
  0.8090742181023293  
  0.027401195225534025
  0.027401195225535746
 -0.1465025139148333  

In [29]:
# how good is our estimate of w? -> not great
norm(w - w♮) / norm(w♮)

0.4630342022012109

In [27]:
# compute mean square error -> but prediction is good
mean((y - X*w).^2)

1.279746640042116

## Q: Why is prediction good even though error in estimate of w is bad?

Answer: ...

In [30]:
# let's use the shorthand
w_backslash = X \ y
norm(w_backslash - w)

1.2938618523521542e-15