In [15]:
import numpy as np
from sklearn.decomposition import TruncatedSVD
np.set_printoptions(edgeitems=10, linewidth=180)
SEED=42

### SVD

$$ \large A_{[\text{m x n}]} = U_{[\text{m x m}]}\Sigma_{[\text{m x n}]}V^T_{[\text{n x n}]}$$
* $A$: Input data matrix
    * $\text{m x n}$ matrix (e.g., $m$ words, $n$ contexts: each element $A_{ij}$ says about the association between a word $i$ and a context $j$)
* $U$: Left singular vectors
    * $\text{m x m}$ matrix (rows are word vectors)
* $\Sigma$: Singular values
    * $\text{m x n}$ matrix (values on the diagonal are the singular values)
* $V^T$: Right singular vectors
    * $\text{n x n}$ matrix (columns are context vectors)

#### SVD Theorem

Let $A \in \mathbb{R}^{\text{m x n}}$ be a rectangular matrix of rank $r \in [0; min(\text{m; n})]$. The SVD of A is a decomposition of the form $$A_{[\text{m x n}]} = U_{[\text{m x m}]}\Sigma_{[\text{m x n}]}V^T_{[\text{n x n}]}$$ 
with an orthogonal matrix $U \in \mathbb{R}^{\text{m x m}}$ with column vectors $u_i, i = 1; ... ;m$ (*left-singular vectors*),  
and an orthogonal matrix $V \in \mathbb{R}^{\text{n x n}}$ with column vectors $v_j, j = 1; ... ;n$ (*right-singular vectors*).  
Moreover, $\Sigma$ is an $\text{m x n}$ matrix with $\Sigma_{ii} = \sigma_i > 0$ and $\Sigma_{ij} = 0; i \neq j$.  

Remarks:
* The diagonal entries $\sigma_i, i = 1; ...; r$, of $\Sigma$ are called the *singular values*.  
* By convention, the singular values are ordered, i.e., $\sigma_1 \geqslant \sigma_2 \geqslant \sigma_r \geqslant 0$.


$©$ 2021 M. P. Deisenroth, A. A. Faisal, C. S. Ong. Published by Cambridge University Press (2020).

#### SVD Python Implementation

In [100]:
np.random.seed(SEED)
A = np.random.rand(5, 10) # create random data matrix
print(f"A:\n{A}")
U, S, V = np.linalg.svd(A)
print(f"U:\n{U}")
print(f"S:\n{S}")
print(f"V:\n{V}")

A:
[[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864 0.15599452 0.05808361 0.86617615 0.60111501 0.70807258]
 [0.02058449 0.96990985 0.83244264 0.21233911 0.18182497 0.18340451 0.30424224 0.52475643 0.43194502 0.29122914]
 [0.61185289 0.13949386 0.29214465 0.36636184 0.45606998 0.78517596 0.19967378 0.51423444 0.59241457 0.04645041]
 [0.60754485 0.17052412 0.06505159 0.94888554 0.96563203 0.80839735 0.30461377 0.09767211 0.68423303 0.44015249]
 [0.12203823 0.49517691 0.03438852 0.9093204  0.25877998 0.66252228 0.31171108 0.52006802 0.54671028 0.18485446]]
U:
[[-0.51821327 -0.50500623  0.04826096 -0.32463687  0.60721125]
 [-0.38224002 -0.53992122 -0.1644405   0.06201188 -0.72903463]
 [-0.39176052  0.26638961 -0.69770141  0.49563613  0.20764835]
 [-0.50019315  0.6002627   0.02992259 -0.57613136 -0.23805208]
 [-0.42623712  0.14891289  0.6949443   0.55962748  0.00404675]]
S:
[3.28058835 1.52963053 0.62591689 0.56047292 0.47296596]
V:
[[-2.43116965e-01 -3.70182713e-01 -2.61894677e-01

#### Truncated SVD

Theory

##### Truncated SVD my implementation

In [149]:
# approx_A = np.dot(U[:, 0:4], np.dot(np.diag(S[0:4]), V[0:4, :]))
# approx_A

In [143]:
# A1 = np.outer(U[:, 0], V[0, :])
# A2 = np.outer(U[:, 1], V[1, :])
# A3 = np.outer(U[:, 2], V[2, :])
# A4 = np.outer(U[:, 3], V[3, :])
# approx_A = S[0] * A1 + S[1] * A2 + S[2] * A3 + S[3] * A4
# approx_A

In [156]:
approx_A = np.dot(U[:, 0:4] * S[0:4], V[0:4, :])
approx_A

array([[0.25593054, 1.03538235, 0.80309091, 0.56065942, 0.2604236 , 0.19589178, 0.1894384 , 0.72711277, 0.59355879, 0.63323519],
       [0.16299044, 0.86825505, 0.74708165, 0.25796184, 0.05647349, 0.13550276, 0.14653404, 0.69171977, 0.44101722, 0.38108098],
       [0.57129191, 0.16844784, 0.31645772, 0.35336728, 0.4917734 , 0.79881965, 0.24459325, 0.46667886, 0.58983056, 0.02085823],
       [0.65404474, 0.13733073, 0.03717862, 0.96378275, 0.92470094, 0.79275596, 0.25311722, 0.15219074, 0.68719538, 0.46949187],
       [0.12124776, 0.49574118, 0.03486235, 0.90906716, 0.25947579, 0.66278818, 0.31258649, 0.51914124, 0.54665992, 0.1843557 ]])

In [170]:
Uk, Sk, Vhk = np.linalg.svd(approx_A) # full SVD

In [171]:
Uk[:, :4].dot(np.diag(Sk[:4]))

array([[-1.70004442, -0.77247294,  0.03020735, -0.18195017],
       [-1.25397214, -0.82587999, -0.10292609,  0.03475598],
       [-1.28520498,  0.40747768, -0.4367031 ,  0.27779063],
       [-1.64092783,  0.91818016,  0.01872906, -0.32290603],
       [-1.39830854,  0.2277817 ,  0.43497737,  0.31365605]])

##### Truncated SVD Sklearn's implementation

In [96]:
np.random.seed(SEED)
A = np.random.rand(5, 10) # create random data matrix
print(f"A:\n{A}")
truncated_svd = TruncatedSVD(n_components=4, n_iter=10, random_state=SEED)
truncated_svd.fit(A)

A:
[[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864 0.15599452 0.05808361 0.86617615 0.60111501 0.70807258]
 [0.02058449 0.96990985 0.83244264 0.21233911 0.18182497 0.18340451 0.30424224 0.52475643 0.43194502 0.29122914]
 [0.61185289 0.13949386 0.29214465 0.36636184 0.45606998 0.78517596 0.19967378 0.51423444 0.59241457 0.04645041]
 [0.60754485 0.17052412 0.06505159 0.94888554 0.96563203 0.80839735 0.30461377 0.09767211 0.68423303 0.44015249]
 [0.12203823 0.49517691 0.03438852 0.9093204  0.25877998 0.66252228 0.31171108 0.52006802 0.54671028 0.18485446]]


TruncatedSVD(n_components=4, n_iter=10, random_state=42)

In [97]:
reduced_A = truncated_svd.transform(A)
print(f"reduced_A:\n{reduced_A}")

reduced_A:
[[ 1.70004442 -0.77247294 -0.03020735  0.18195017]
 [ 1.25397214 -0.82587999  0.10292609 -0.03475598]
 [ 1.28520498  0.40747768  0.4367031  -0.27779063]
 [ 1.64092783  0.91818016 -0.01872906  0.32290603]
 [ 1.39830854  0.2277817  -0.43497737 -0.31365605]]


##### Truncated SVD scipy's implementation

In [174]:
from scipy.sparse.linalg import svds
num_components = 4
u, s, v = svds(A, k=num_components, which='LM')
n = len(s)
# reverse the n first columns of u
u[:,:n] = u[:, n-1::-1]
# reverse s
s = s[::-1]
# # reverse the n first rows of vt
# v[:n, :] = v[n-1::-1, :]
X = u.dot(np.diag(s))  # output of TruncatedSVD
X

array([[-1.70004442, -0.77247294, -0.03020735, -0.18195017],
       [-1.25397214, -0.82587999,  0.10292609,  0.03475598],
       [-1.28520498,  0.40747768,  0.4367031 ,  0.27779063],
       [-1.64092783,  0.91818016, -0.01872906, -0.32290603],
       [-1.39830854,  0.2277817 , -0.43497737,  0.31365605]])

### TODO
* Write theory for truncated SVD
* Write about difference in signs