In [12]:
import numpy as np
from numpy.linalg import svd
movie_ratings = np.array([
    [2, 5, 3, 6],
    [1, 2, 1, 2],
    [4, 1, 1, 2],
    [3, 5, 2, 4],
    [5, 3, 1, 2],
    [4, 5, 5, 10],
    [2, 4, 2, 4],
    [2, 2, 5, 10],
])

In [13]:
print(movie_ratings.shape)

(8, 4)


What is the rank of the movie matrix?

In [14]:
from numpy.linalg import matrix_rank
print(matrix_rank(movie_ratings))

3


if we did an SVD, what would we expect the dimensions to be?
movie_ratings = 8x4
U = 8x3
$\Sigma$ = 3x3
V = 4x3, $V^{T}$ = 3x4

In [21]:
U, singular_values, V = svd(movie_ratings)

In [22]:
print(U.shape, singular_values.shape, V.shape)

(8, 8) (4,) (4, 4)


Hmm, that's not what we expected

In [23]:
singular_values

array([2.20718241e+01, 6.11658901e+00, 3.66359394e+00, 1.06184482e-16])

See how the last number is *almost* 0?

In [24]:
U_mod = U[:,:3]

In [25]:
U_mod.shape

(8, 3)

In [26]:
singular_values_mod = np.diag(singular_values[:3])

In [27]:
singular_values_mod

array([[22.07182408,  0.        ,  0.        ],
       [ 0.        ,  6.11658901,  0.        ],
       [ 0.        ,  0.        ,  3.66359394]])

In [28]:
V_mod = V[:3, :]

In [29]:
np.round(movie_ratings - np.matmul(U_mod, np.dot(singular_values_mod, V_mod)), decimals=10)

array([[-0., -0.,  0.,  0.],
       [ 0., -0.,  0.,  0.],
       [ 0., -0., -0., -0.],
       [ 0., -0.,  0.,  0.],
       [-0., -0.,  0.,  0.],
       [-0., -0.,  0.,  0.],
       [-0., -0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

If you had a matrix X mxn with rank k, you can always find two matrices U and V (each with rank k, meaning possibly they have dimensions mxk and kxn) such that X = UxV

In [30]:
U_mod = U[:,:2]
singular_values_mod = np.diag(singular_values[:2])
V_mod = V[:2, :]

In [31]:
np.round(movie_ratings - np.matmul(U_mod, np.dot(singular_values_mod, V_mod)), decimals=10)

array([[-1.03753545,  1.19862062, -0.09090295, -0.18180591],
       [-0.36765403,  0.42473508, -0.03221175, -0.06442351],
       [ 1.38340172, -1.59818524,  0.12120579,  0.24241157],
       [-0.80073471,  0.92505479, -0.07015582, -0.14031164],
       [ 0.83114115, -0.96018206,  0.07281986,  0.14563972],
       [ 0.17449218, -0.20158341,  0.01528801,  0.03057602],
       [-0.73530806,  0.84947017, -0.06442351, -0.12884702],
       [ 0.7921794 , -0.91517121,  0.06940625,  0.1388125 ]])

This is a 2-rank approximation, so there are some errors.