In [2]:
import numpy as np
from sklearn.decomposition import NMF
import pandas as pd

# movie, ratings by users
data = [
    [5, 3, 0, 1],
    [4, 2, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
]
index = ['Titanic', 'Tiffany', 'Terminator', 'Star Trek', 'Star Wars']
columns = ['Ada', 'Bob', 'Steve', 'Margaret']

R = pd.DataFrame(data, index=index, columns=columns).values
R


array([[5, 3, 0, 1],
       [4, 2, 0, 1],
       [1, 1, 0, 5],
       [1, 0, 0, 4],
       [0, 1, 5, 4]], dtype=int64)

In [3]:
# model assumes R ~ PQ'
model = NMF(n_components=2, init='random', random_state=10)

In [6]:
model.fit(R)

NMF(alpha=0.0, beta_loss='frobenius', init='random', l1_ratio=0.0,
  max_iter=200, n_components=2, random_state=10, shuffle=False,
  solver='cd', tol=0.0001, verbose=0)

In [8]:
model.components_

array([[0.        , 0.22474257, 1.16108548, 2.15691714],
       [1.37075289, 0.74951363, 0.        , 0.37169942]])

In [9]:
columns

['Ada', 'Bob', 'Steve', 'Margaret']

In [10]:
P = model.components_  # movie feature

Q = model.transform(R)  # user feature
print(Q)

[[0.         3.67366881]
 [0.         2.85150784]
 [1.67302237 0.91343142]
 [1.32832611 0.60834015]
 [2.42241476 0.        ]]


In [12]:
print(model.reconstruction_err_)


3.932887566921555


In [13]:
P.shape

(2, 4)

In [14]:
Q.shape

(5, 2)

In [15]:
Q.shape, P.shape

((5, 2), (2, 4))

In [17]:
nR = np.dot(Q, P)
print(nR)


[[5.03569214 2.75346485 0.         1.36550058]
 [3.90871261 2.13724399 0.         1.05990382]
 [1.25208875 1.06062865 1.94252199 3.94809256]
 [0.83388402 0.75449066 1.54230017 3.09120904]
 [0.         0.54441972 2.81263061 5.22494791]]


In [18]:
nR.T

array([[5.03569214, 3.90871261, 1.25208875, 0.83388402, 0.        ],
       [2.75346485, 2.13724399, 1.06062865, 0.75449066, 0.54441972],
       [0.        , 0.        , 1.94252199, 1.54230017, 2.81263061],
       [1.36550058, 1.05990382, 3.94809256, 3.09120904, 5.22494791]])

In [19]:
# predict the hidden features for a new data point

query = [[0, 0, 5, 0]]
print(model.transform(query))

[[0.95942884 0.        ]]


In [20]:
t = model.transform(query)

In [21]:
np.dot(t, Q.T)

array([[0.        , 0.        , 1.60514592, 1.27443438, 2.32413459]])

In [None]:
new_user = ['Pulp Fiction', 'Arielle', 'Matrix III']