In [1]:
using LinearAlgebra, Random, Statistics

In [2]:
# generate random data matrix
n,d = 10,4
X = randn(n,d)

# optional: give it linearly dependent columns
X[:,3] = X[:,2]

10-element Array{Float64,1}:
 -0.2292924575926503 
  3.0038579866493063 
 -0.04186143334300543
 -0.5569761886293225 
 -0.9678994091362471 
  0.08559138044446177
  1.6572755838325788 
 -0.14945559003638517
 -1.3642845508984902 
  0.49126493235116814

In [3]:
U,σ,V = svd(X)

SVD{Float64,Float64,Array{Float64,2}}([-0.0583343 -0.31289 0.116624 0.0253931; 0.772663 -0.139247 -0.11239 -0.58839; … ; -0.35584 0.187792 0.394046 -0.640279; 0.132214 -0.201358 -0.411603 0.2211], [5.51707, 2.39269, 2.09817, 1.85399e-16], [0.00340931 0.706679 0.706679 -0.0346196; 0.872678 0.00985064 0.00985064 0.488096; -0.488283 0.0225396 0.0225396 0.872103; 0.0 -0.707107 0.707107 -1.11022e-16])

In [4]:
U'*U

4×4 Array{Float64,2}:
  1.0          -1.19285e-16  -3.28834e-16   2.82122e-16
 -1.19285e-16   1.0          -3.98916e-16  -4.94865e-17
 -3.28834e-16  -3.98916e-16   1.0          -1.12069e-16
  2.82122e-16  -4.94865e-17  -1.12069e-16   1.0        

In [5]:
V'*V

4×4 Array{Float64,2}:
  1.0           8.13397e-17   6.19853e-17  -9.84806e-17
  8.13397e-17   1.0           1.59805e-17  -3.38299e-17
  6.19853e-17   1.59805e-17   1.0          -1.54986e-17
 -9.84806e-17  -3.38299e-17  -1.54986e-17   1.0        

In [6]:
σ

4-element Array{Float64,1}:
 5.517065489881705    
 2.392688952166706    
 2.098174599462027    
 1.853992165135723e-16

In [7]:
# decomposition is just as good if we ignore the 0 in sigma and reduce r by 1
norm(X - U[:,1:3]*Diagonal(σ[1:3])*(V[:,1:3])')

5.46330746322388e-15

In [8]:
# form data from noisy linear model
w♮ = randn(d)
y = X*w♮ + .1*randn(n);

In [9]:
# solve least squares problem to estimate w

# full svd - takes inverse of 0!
w = V*Diagonal(σ.^(-1))*U'*y

# thin svd
w = V[:,1:3]*Diagonal(σ[1:3].^(-1))*(U[:,1:3])'*y

4-element Array{Float64,1}:
 0.957726104709162 
 0.8979311192084566
 0.8979311192084558
 0.3753573091990735

In [10]:
# how good is our estimate of w? -> not great
norm(w - w♮) / norm(w♮)

0.06619061434328534

In [11]:
# compute mean square error -> but prediction is good
mean((y - X*w).^2)

0.0033561740046633877

## Q: Why is prediction good even though error in estimate of w is bad?

Answer: ...

In [12]:
# let's use the shorthand
w_backslash = X \ y
norm(w_backslash - w)

1.6736404963781187e-15