---

# Data Mining:<br>Statistical Modeling and Learning from Data

## Dr. Ciro Cattuto<br>Dr. Laetitia Gauvin<br>Dr. André Panisson

### Exercises - SVD versus NMF

---

In [1]:
import sklearn
import numpy as np
from scipy.linalg import svd

In [2]:
A = np.matrix([[1,1,1,1,1],
               [0,1,0,1,0],
               [0,1,0,1,0]])

In [3]:
A

matrix([[1, 1, 1, 1, 1],
        [0, 1, 0, 1, 0],
        [0, 1, 0, 1, 0]])

# Singular Value Decomposition

In [4]:
U, S, Vh = svd(A, full_matrices=False)

#Singular vectors and diagonal matrix

In [5]:
U

array([[ -7.66184591e-01,   6.42620551e-01,   2.07654813e-16],
       [ -4.54401349e-01,  -5.41774320e-01,  -7.07106781e-01],
       [ -4.54401349e-01,  -5.41774320e-01,   7.07106781e-01]])

In [6]:
S

array([ 2.71519453,  1.27582079,  0.        ])

In [7]:
Vh

array([[-0.28218405, -0.61689403, -0.28218405, -0.61689403, -0.28218405],
       [ 0.50369186, -0.34560347,  0.50369186, -0.34560347,  0.50369186],
       [-0.57064676,  0.07597177,  0.78532338, -0.07597177, -0.21467662]])

In [8]:
c1 = (U[:,0]*S[0])[:, np.newaxis].dot(Vh[[0],:])
c2 = (U[:,1]*S[1])[:, np.newaxis].dot(Vh[[1],:])

In [9]:
(c1+c2).round(5)

array([[ 1.,  1.,  1.,  1.,  1.],
       [-0.,  1., -0.,  1., -0.],
       [-0.,  1., -0.,  1., -0.]])

# Non-negative Matrix Factorization

In [10]:
from sklearn.decomposition import NMF

In [11]:
nmf = NMF(n_components=2)

In [12]:
nmf.fit(A)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200,
  n_components=2, random_state=None, shuffle=False, solver='cd',
  tol=0.0001, verbose=0)

In [13]:
R = nmf.components_

In [14]:
R.round(3)

array([[ 0.   ,  1.191,  0.   ,  1.191,  0.   ],
       [ 0.905,  0.   ,  0.905,  0.   ,  0.905]])

In [15]:
L=nmf.transform(A)

In [16]:
L

array([[ 0.83940195,  1.10509402],
       [ 0.83940195,  0.        ],
       [ 0.83940195,  0.        ]])

In [17]:
np.round(np.matrix(L)[:,0]*np.matrix(R)[0,:])

matrix([[ 0.,  1.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  1.,  0.]])

In [18]:
np.round(np.matrix(L)[:,1]*np.matrix(R)[1,:])

matrix([[ 1.,  0.,  1.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.]])