# Product Recommendation
Reference: https://ieeexplore.ieee.org/document/5430993

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm

In [2]:
tf.__version__

'2.2.0'

## Data Preprocessing

In [3]:
Y_data = pd.read_csv('data/Y.csv', header=None, names=['Rating','Movie','User'], dtype=int) # training data
P_data = pd.read_csv('data/P.csv', header=None, names=['Rating','Movie','User'], dtype=int) # test data ('probe-set' mentioned in paper)

In [4]:
display(Y_data.head())
display(P_data.head())

Unnamed: 0,Rating,Movie,User
0,5,2,1
1,4,7,1
2,4,8,1
3,4,11,1
4,4,12,1


Unnamed: 0,Rating,Movie,User
0,3,6,1
1,5,96,1
2,3,1,2
3,3,33,3
4,5,42,4


In [5]:
Y_data.shape, P_data.shape

((3399874, 3), (189699, 3))

In [6]:
print(Y_data['Rating'].unique().max(), Y_data['Movie'].unique().max(), Y_data['User'].unique().max())
print(P_data['Rating'].unique().max(), P_data['Movie'].unique().max(), P_data['User'].unique().max())

5 100 137328
5 100 137328


In [7]:
k, n = Y_data['Movie'].unique().max(), Y_data['User'].unique().max()
k, n

(100, 137328)

In [8]:
indices = np.reshape(Y_data[['Movie', 'User']].values-1, (-1, 2))
print(indices.shape)
indices

(3399874, 2)


array([[     1,      0],
       [     6,      0],
       [     7,      0],
       ...,
       [    97, 137327],
       [    98, 137327],
       [    99, 137327]])

In [9]:
Z_sparse = tf.SparseTensor(indices=indices, values=Y_data['Rating'].values, dense_shape=[k, n])
Z_sparse = tf.cast(Z_sparse, tf.float64)

In [10]:
# %%time
# t = 0
# Z_t = tf.sparse.slice(Z_sparse, [0, t], [100, 1])

In [11]:
# use dense matrices for faster linear transformations since all matrices can fit in memory
Z = tf.sparse.to_dense(Z_sparse, validate_indices=False)
Z

<tf.Tensor: shape=(100, 137328), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [5., 0., 0., ..., 0., 0., 3.],
       [0., 0., 0., ..., 3., 0., 0.],
       ...,
       [5., 0., 0., ..., 4., 0., 4.],
       [4., 0., 3., ..., 0., 0., 4.],
       [3., 4., 0., ..., 4., 5., 4.]])>

In [12]:
# memoization
t_Z_dict = {}
t_y_dict = {}
t_x_dict = {}
t_Hy_dict = {}
t_Hx_dict = {}
t_Hy_trans_dict = {}
t_Hx_trans_dict = {}
t_movie_ids_labels_dict = {}
t_labels_dict = {}

for t in tqdm(range(n)):
    movie_ids = Y_data['Movie'][Y_data['User']==t+1].values
    H_yt = tf.constant(np.identity(k)[movie_ids-1], dtype=tf.float64)
    H_xt = tf.constant(np.delete(np.identity(k), movie_ids-1, 0), dtype=tf.float64)
    Z_t = tf.expand_dims(Z[:, t], axis=1) # alternative: Z_t = tf.sparse.slice(Z_sparse, [0, t], [100, 1]) 
    y_t = tf.matmul(H_yt, Z_t)
    x_t = tf.matmul(H_xt, Z_t)
    
    # store the variables for fast future reference
    t_Hy_dict[t] = H_yt
    t_Hx_dict[t] = H_xt
    t_Hx_trans_dict[t] = tf.transpose(H_xt)
    t_Hy_trans_dict[t] = tf.transpose(H_yt)
    
    t_x_dict[t] = x_t
    t_y_dict[t] = y_t
    t_Z_dict[t] = Z_t
    
    t_movie_ids_labels_dict[t] = P_data['Movie'][P_data['User']==t+1].values
    t_labels_dict[t] = tf.expand_dims(P_data['Rating'][P_data['User']==t+1].values, axis=1)

100%|██████████| 137328/137328 [13:13<00:00, 173.05it/s]


## Initialization
$\mu$ has 1 type available <br />
R has 4 types available

In [13]:
# initial estimate of mu
N = 0
H_yty_t = 0

for t in tqdm(range(n)):
    N += tf.matmul(t_Hy_trans_dict[t], t_Hy_dict[t])
    H_yty_t += tf.matmul(t_Hy_trans_dict[t], t_y_dict[t])

100%|██████████| 137328/137328 [00:15<00:00, 9045.02it/s] 


In [14]:
# The ith diagonal element of N equals the total number of ratings of the ith product.
N

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[20017.,     0.,     0., ...,     0.,     0.,     0.],
       [    0., 23917.,     0., ...,     0.,     0.,     0.],
       [    0.,     0., 31634., ...,     0.,     0.,     0.],
       ...,
       [    0.,     0.,     0., ..., 60896.,     0.,     0.],
       [    0.,     0.,     0., ...,     0., 61521.,     0.],
       [    0.,     0.,     0., ...,     0.,     0., 64506.]])>

In [15]:
print(H_yty_t.shape)
mu_hat0 = tf.matmul(tf.linalg.inv(N), H_yty_t)
mu_hat0.shape

(100, 1)


TensorShape([100, 1])

In [16]:
# initial estimates of R (4 types available)
R_hat0_1 = tf.constant(np.identity(k), dtype=tf.float64)
R_hat0_1

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [17]:
S = 0
for t in tqdm(range(n)):
    Hyt = t_Hy_dict[t]
    yt = t_y_dict[t]
    Hytmu_hat0 = tf.matmul(Hyt, mu_hat0)
    S += tf.matmul(tf.transpose(Hyt), tf.matmul(yt - Hytmu_hat0, tf.matmul(tf.transpose(yt - Hytmu_hat0), Hyt)))

100%|██████████| 137328/137328 [00:31<00:00, 4342.29it/s]


In [18]:
# diag_S is the diagonal matrix consisting of the diagonal elements of S
diag_S = tf.linalg.diag(tf.linalg.tensor_diag_part(S))
R_hat0_2 = tf.matmul(tf.linalg.inv(N), diag_S)
R_hat0_2

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[1.72440427, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.94219113, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.43659411, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.18291506, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.03485685,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.26227449]])>

In [19]:
# R_hat0_3 is not a good initializer when rating variances are far from one
R_hat0_3 = tf.matmul(tf.linalg.sqrtm(tf.linalg.inv(diag_S)), tf.matmul(S, tf.linalg.sqrtm(tf.linalg.inv(diag_S))))
R_hat0_3

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[ 1.        ,  0.07418256, -0.01158277, ..., -0.01462987,
        -0.02215371, -0.01844816],
       [ 0.07418256,  1.        ,  0.03674347, ...,  0.0256191 ,
         0.03563234,  0.03926307],
       [-0.01158277,  0.03674347,  1.        , ...,  0.10955311,
         0.12823359,  0.15560634],
       ...,
       [-0.01462987,  0.0256191 ,  0.10955311, ...,  1.        ,
         0.19781317,  0.15164928],
       [-0.02215371,  0.03563234,  0.12823359, ...,  0.19781317,
         1.        ,  0.18995689],
       [-0.01844816,  0.03926307,  0.15560634, ...,  0.15164928,
         0.18995689,  1.        ]])>

In [20]:
R_hat0_4 = tf.matmul(tf.linalg.sqrtm(tf.linalg.inv(N)), tf.matmul(S, tf.linalg.sqrtm(tf.linalg.inv(N))))
R_hat0_4

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[ 1.72440427,  0.09455639, -0.01823052, ..., -0.02089473,
        -0.02959417, -0.02721758],
       [ 0.09455639,  0.94219113,  0.04274809, ...,  0.02704644,
         0.03518471,  0.04281842],
       [-0.01823052,  0.04274809,  1.43659411, ...,  0.14281326,
         0.15635399,  0.20954206],
       ...,
       [-0.02089473,  0.02704644,  0.14281326, ...,  1.18291506,
         0.21886288,  0.18530794],
       [-0.02959417,  0.03518471,  0.15635399, ...,  0.21886288,
         1.03485685,  0.21710614],
       [-0.02721758,  0.04281842,  0.20954206, ...,  0.18530794,
         0.21710614,  1.26227449]])>

## Expectation Maximization Algorithm

In [21]:
LOG_2PI = tf.math.log(2*tf.constant(np.pi, dtype=tf.float64))

@tf.function(experimental_relax_shapes=True)
def run_graph_em(mu, R, y_t, H_xt, H_xt_trans, H_yt, H_yt_trans):
    # for R estimation
    R_xt = H_xt @ R @ H_xt_trans
    R_yt = H_yt @ R @ H_yt_trans
    R_yt_det = tf.linalg.det(R_yt)
    R_yt_inv = tf.linalg.inv(R_yt)
    R_xtyt = H_xt @ R @ H_yt_trans

    mu_yt = tf.matmul(H_yt, mu)
    mu_xt = tf.matmul(H_xt, mu)

    X_t_hat = R_xtyt @ R_yt_inv @ (y_t - mu_yt) + mu_xt
    Z_t_hat = H_yt_trans @ y_t + H_xt_trans @ X_t_hat
    
    R_hat_sum_part = (Z_t_hat - mu) @ tf.transpose(Z_t_hat - mu) \
                        + H_xt_trans @ (R_xt - R_xtyt @ R_yt_inv @ tf.transpose(R_xtyt)) @ H_xt

    # for mu estimation
    Hyt_trans_Ryt_inv_Hyt_sum_part = H_yt_trans @ R_yt_inv @ H_yt
    Hyt_trans_Ryt_inv_yt_sum_part = H_yt_trans @ R_yt_inv @ y_t

    # for log likelihood calculation
    log_p_hat_part = -1/2*(tf.math.log(R_yt_det) + tf.transpose(y_t - mu_yt) @ R_yt_inv @ (y_t - mu_yt) + k*LOG_2PI) 
    
    return R_hat_sum_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part

In [22]:
def expectation_maximization(mu, R):
    Hyt_trans_Ryt_inv_Hyt_sum = 0
    Hyt_trans_Ryt_inv_yt_sum = 0
    R_hat_sum = 0
    log_p_hat = 0
    
    for t in tqdm(range(n)):
        y_t = t_y_dict[t]
        H_xt = t_Hx_dict[t]
        H_xt_trans = t_Hx_trans_dict[t]
        H_yt = t_Hy_dict[t]
        H_yt_trans = t_Hy_trans_dict[t]
        
        R_hat_sum_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part = \
            run_graph_em(mu, R, y_t, H_xt, H_xt_trans, H_yt, H_yt_trans)
        
        R_hat_sum += R_hat_sum_part
        Hyt_trans_Ryt_inv_Hyt_sum += Hyt_trans_Ryt_inv_Hyt_sum_part
        Hyt_trans_Ryt_inv_yt_sum += Hyt_trans_Ryt_inv_yt_sum_part
        log_p_hat += log_p_hat_part
        
    R_hat = R_hat_sum / n
    mu_hat = tf.matmul(tf.linalg.inv(Hyt_trans_Ryt_inv_Hyt_sum), Hyt_trans_Ryt_inv_yt_sum)    
    return mu_hat, R_hat, log_p_hat

In [23]:
delta = 0.0005
mu = mu_hat0
R = R_hat0_4
log_p = tf.constant(-np.inf, dtype=tf.float64)

for i in range(30):
    if i % 5 == 0:
        print(f'iteration: {i}')
    
    mu_hat, R_hat, log_p_hat = expectation_maximization(mu, R)
    convergence_criterion = log_p_hat/n - log_p/n < delta
    
    print('normalized log_p_hat:', (log_p_hat/n).numpy())
    print('normalized log_p:', (log_p/n).numpy())
    print('convergence gap:', (log_p_hat/n - log_p/n).numpy())
    
    if convergence_criterion:
        break
        
    # use new estimattions for next iteration
    mu = mu_hat
    R = R_hat
    log_p = log_p_hat

  0%|          | 0/137328 [00:00<?, ?it/s]

iteration: 0


100%|██████████| 137328/137328 [01:42<00:00, 1344.10it/s]
  0%|          | 159/137328 [00:00<01:26, 1587.85it/s]

normalized log_p_hat: [[-101.37606558]]
normalized log_p: -inf
convergence gap: [[inf]]


100%|██████████| 137328/137328 [01:27<00:00, 1561.18it/s]
  0%|          | 168/137328 [00:00<01:21, 1677.73it/s]

normalized log_p_hat: [[-101.07910104]]
normalized log_p: [[-101.37606558]]
convergence gap: [[0.29696454]]


100%|██████████| 137328/137328 [01:25<00:00, 1603.79it/s]
  0%|          | 170/137328 [00:00<01:20, 1694.99it/s]

normalized log_p_hat: [[-100.92736468]]
normalized log_p: [[-101.07910104]]
convergence gap: [[0.15173636]]


100%|██████████| 137328/137328 [01:24<00:00, 1632.86it/s]
  0%|          | 162/137328 [00:00<01:24, 1617.48it/s]

normalized log_p_hat: [[-100.83160767]]
normalized log_p: [[-100.92736468]]
convergence gap: [[0.09575701]]


100%|██████████| 137328/137328 [01:22<00:00, 1662.50it/s]
  0%|          | 161/137328 [00:00<01:25, 1602.33it/s]

normalized log_p_hat: [[-100.76501937]]
normalized log_p: [[-100.83160767]]
convergence gap: [[0.0665883]]
iteration: 5


100%|██████████| 137328/137328 [01:27<00:00, 1566.43it/s]
  0%|          | 159/137328 [00:00<01:26, 1583.13it/s]

normalized log_p_hat: [[-100.71663616]]
normalized log_p: [[-100.76501937]]
convergence gap: [[0.04838321]]


100%|██████████| 137328/137328 [01:26<00:00, 1586.35it/s]
  0%|          | 160/137328 [00:00<01:25, 1595.02it/s]

normalized log_p_hat: [[-100.68061666]]
normalized log_p: [[-100.71663616]]
convergence gap: [[0.03601951]]


100%|██████████| 137328/137328 [01:28<00:00, 1552.16it/s]
  0%|          | 157/137328 [00:00<01:27, 1569.01it/s]

normalized log_p_hat: [[-100.65335758]]
normalized log_p: [[-100.68061666]]
convergence gap: [[0.02725908]]


100%|██████████| 137328/137328 [01:32<00:00, 1478.74it/s]
  0%|          | 322/137328 [00:00<01:25, 1600.11it/s]

normalized log_p_hat: [[-100.63248001]]
normalized log_p: [[-100.65335758]]
convergence gap: [[0.02087757]]


100%|██████████| 137328/137328 [01:34<00:00, 1460.13it/s]
  0%|          | 138/137328 [00:00<01:39, 1373.23it/s]

normalized log_p_hat: [[-100.61634676]]
normalized log_p: [[-100.63248001]]
convergence gap: [[0.01613325]]
iteration: 10


100%|██████████| 137328/137328 [01:28<00:00, 1555.92it/s]
  0%|          | 173/137328 [00:00<01:19, 1729.07it/s]

normalized log_p_hat: [[-100.60379461]]
normalized log_p: [[-100.61634676]]
convergence gap: [[0.01255214]]


100%|██████████| 137328/137328 [01:21<00:00, 1675.64it/s]
  0%|          | 161/137328 [00:00<01:25, 1607.17it/s]

normalized log_p_hat: [[-100.59397571]]
normalized log_p: [[-100.60379461]]
convergence gap: [[0.0098189]]


100%|██████████| 137328/137328 [01:21<00:00, 1687.36it/s]
  0%|          | 171/137328 [00:00<01:20, 1700.80it/s]

normalized log_p_hat: [[-100.5862597]]
normalized log_p: [[-100.59397571]]
convergence gap: [[0.00771601]]


100%|██████████| 137328/137328 [01:21<00:00, 1677.54it/s]
  0%|          | 167/137328 [00:00<01:22, 1665.44it/s]

normalized log_p_hat: [[-100.58017111]]
normalized log_p: [[-100.5862597]]
convergence gap: [[0.00608859]]


100%|██████████| 137328/137328 [01:21<00:00, 1677.41it/s]
  0%|          | 171/137328 [00:00<01:20, 1701.16it/s]

normalized log_p_hat: [[-100.5753476]]
normalized log_p: [[-100.58017111]]
convergence gap: [[0.0048235]]
iteration: 15


100%|██████████| 137328/137328 [01:21<00:00, 1687.23it/s]
  0%|          | 327/137328 [00:00<01:26, 1575.87it/s]

normalized log_p_hat: [[-100.57151119]]
normalized log_p: [[-100.5753476]]
convergence gap: [[0.00383642]]


100%|██████████| 137328/137328 [01:21<00:00, 1676.57it/s]
  0%|          | 167/137328 [00:00<01:22, 1663.30it/s]

normalized log_p_hat: [[-100.56844755]]
normalized log_p: [[-100.57151119]]
convergence gap: [[0.00306364]]


100%|██████████| 137328/137328 [01:26<00:00, 1585.54it/s]
  0%|          | 169/137328 [00:00<01:21, 1685.11it/s]

normalized log_p_hat: [[-100.56599094]]
normalized log_p: [[-100.56844755]]
convergence gap: [[0.00245661]]


100%|██████████| 137328/137328 [01:23<00:00, 1635.94it/s]
  0%|          | 167/137328 [00:00<01:22, 1669.57it/s]

normalized log_p_hat: [[-100.56401278]]
normalized log_p: [[-100.56599094]]
convergence gap: [[0.00197815]]


100%|██████████| 137328/137328 [01:24<00:00, 1616.06it/s]
  0%|          | 166/137328 [00:00<01:22, 1653.57it/s]

normalized log_p_hat: [[-100.56241312]]
normalized log_p: [[-100.56401278]]
convergence gap: [[0.00159966]]
iteration: 20


100%|██████████| 137328/137328 [01:26<00:00, 1596.70it/s]
  0%|          | 159/137328 [00:00<01:26, 1587.54it/s]

normalized log_p_hat: [[-100.561114]]
normalized log_p: [[-100.56241312]]
convergence gap: [[0.00129912]]


100%|██████████| 137328/137328 [01:26<00:00, 1593.45it/s]
  0%|          | 157/137328 [00:00<01:27, 1560.74it/s]

normalized log_p_hat: [[-100.56005448]]
normalized log_p: [[-100.561114]]
convergence gap: [[0.00105952]]


100%|██████████| 137328/137328 [01:25<00:00, 1614.18it/s]
  0%|          | 162/137328 [00:00<01:24, 1613.85it/s]

normalized log_p_hat: [[-100.55918673]]
normalized log_p: [[-100.56005448]]
convergence gap: [[0.00086775]]


100%|██████████| 137328/137328 [01:25<00:00, 1610.18it/s]
  0%|          | 159/137328 [00:00<01:26, 1587.54it/s]

normalized log_p_hat: [[-100.55847312]]
normalized log_p: [[-100.55918673]]
convergence gap: [[0.00071361]]


100%|██████████| 137328/137328 [01:25<00:00, 1602.35it/s]
  0%|          | 161/137328 [00:00<01:25, 1602.04it/s]

normalized log_p_hat: [[-100.55788391]]
normalized log_p: [[-100.55847312]]
convergence gap: [[0.00058921]]
iteration: 25


100%|██████████| 137328/137328 [01:24<00:00, 1616.26it/s]

normalized log_p_hat: [[-100.5573955]]
normalized log_p: [[-100.55788391]]
convergence gap: [[0.00048841]]





In [24]:
# 26 iterations, ~39 min
np.save('results/em_mu.npy', mu_hat)
np.save('results/em_R.npy', R_hat)
np.save('results/em_log_p.npy', log_p_hat)

## McMichael’s Algorithm

In [25]:
@tf.function(experimental_relax_shapes=True)
def run_graph_mcmichael(mu, R, y_t, H_yt, H_yt_trans):
    # for R estimation
    R_yt = H_yt @ R @ H_yt_trans
    R_yt_det = tf.linalg.det(R_yt)
    R_yt_inv = tf.linalg.inv(R_yt)
    mu_yt = tf.matmul(H_yt, mu)
    log_p_gradient_part = H_yt_trans @ (R_yt_inv - R_yt_inv @ (y_t - mu_yt) @ tf.transpose(y_t - mu_yt) @ R_yt_inv) @ H_yt

    # for mu estimation
    Hyt_trans_Ryt_inv_Hyt_sum_part = H_yt_trans @ R_yt_inv @ H_yt
    Hyt_trans_Ryt_inv_yt_sum_part = H_yt_trans @ R_yt_inv @ y_t
    
    # for log likelihood calculation
    log_p_hat_part = -1/2*(tf.math.log(R_yt_det) + tf.transpose(y_t - mu_yt) @ R_yt_inv @ (y_t - mu_yt) + k*LOG_2PI)
    
    return log_p_gradient_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part

In [26]:
def mcmichael(mu, R):
    gamma = 0.00001
    Hyt_trans_Ryt_inv_Hyt_sum = 0
    Hyt_trans_Ryt_inv_yt_sum = 0
    log_p_gradient = 0
    log_p_hat = 0

    for t in tqdm(range(n)):
        y_t = t_y_dict[t]
        H_yt = t_Hy_dict[t]
        H_yt_trans = t_Hy_trans_dict[t]
        
        log_p_gradient_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part = \
            run_graph_mcmichael(mu, R, y_t, H_yt, H_yt_trans)
        
        log_p_gradient += log_p_gradient_part
        Hyt_trans_Ryt_inv_Hyt_sum += Hyt_trans_Ryt_inv_Hyt_sum_part
        Hyt_trans_Ryt_inv_yt_sum += Hyt_trans_Ryt_inv_yt_sum_part
        log_p_hat += log_p_hat_part
        
    R_hat = R + gamma*(R @ (-1/2*log_p_gradient) @ R)
    mu_hat = tf.matmul(tf.linalg.inv(Hyt_trans_Ryt_inv_Hyt_sum), Hyt_trans_Ryt_inv_yt_sum)
    return mu_hat, R_hat, log_p_hat

In [27]:
delta = 0.0005
mu = mu_hat0
R = R_hat0_4
log_p = tf.constant(-np.inf, dtype=tf.float64)

for i in range(40):
    if i % 5 == 0:
        print(f'iteration: {i}')
    
    mu_hat, R_hat, log_p_hat = mcmichael(mu, R)
    convergence_criterion = log_p_hat/n - log_p/n < delta
    
    print('normalized log_p_hat:', (log_p_hat/n).numpy())
    print('normalized log_p:', (log_p/n).numpy())
    print('convergence gap:', (log_p_hat/n - log_p/n).numpy())
    
    if convergence_criterion:
        break
        
    # use new estimattions for next iteration
    mu = mu_hat
    R = R_hat
    log_p = log_p_hat

  0%|          | 0/137328 [00:00<?, ?it/s]

iteration: 0


100%|██████████| 137328/137328 [01:00<00:00, 2262.36it/s]
  0%|          | 203/137328 [00:00<01:07, 2021.15it/s]

normalized log_p_hat: [[-101.37606558]]
normalized log_p: -inf
convergence gap: [[inf]]


100%|██████████| 137328/137328 [00:59<00:00, 2297.82it/s]
  0%|          | 209/137328 [00:00<01:05, 2082.63it/s]

normalized log_p_hat: [[-101.15033828]]
normalized log_p: [[-101.37606558]]
convergence gap: [[0.2257273]]


100%|██████████| 137328/137328 [01:01<00:00, 2216.47it/s]
  0%|          | 216/137328 [00:00<01:03, 2151.60it/s]

normalized log_p_hat: [[-101.0176644]]
normalized log_p: [[-101.15033828]]
convergence gap: [[0.13267387]]


100%|██████████| 137328/137328 [01:01<00:00, 2218.82it/s]
  0%|          | 479/137328 [00:00<00:57, 2366.49it/s]

normalized log_p_hat: [[-100.92620553]]
normalized log_p: [[-101.0176644]]
convergence gap: [[0.09145888]]


100%|██████████| 137328/137328 [01:01<00:00, 2215.60it/s]
  0%|          | 432/137328 [00:00<01:03, 2146.46it/s]

normalized log_p_hat: [[-100.85833264]]
normalized log_p: [[-100.92620553]]
convergence gap: [[0.06787289]]
iteration: 5


100%|██████████| 137328/137328 [01:02<00:00, 2205.16it/s]
  0%|          | 234/137328 [00:00<00:59, 2322.91it/s]

normalized log_p_hat: [[-100.80569404]]
normalized log_p: [[-100.85833264]]
convergence gap: [[0.0526386]]


100%|██████████| 137328/137328 [01:01<00:00, 2236.29it/s]
  0%|          | 444/137328 [00:00<01:03, 2165.48it/s]

normalized log_p_hat: [[-100.76380998]]
normalized log_p: [[-100.80569404]]
convergence gap: [[0.04188405]]


100%|██████████| 137328/137328 [01:02<00:00, 2202.84it/s]
  0%|          | 236/137328 [00:00<00:58, 2355.36it/s]

normalized log_p_hat: [[-100.72994344]]
normalized log_p: [[-100.76380998]]
convergence gap: [[0.03386655]]


100%|██████████| 137328/137328 [01:02<00:00, 2211.46it/s]
  0%|          | 235/137328 [00:00<00:58, 2343.23it/s]

normalized log_p_hat: [[-100.7022525]]
normalized log_p: [[-100.72994344]]
convergence gap: [[0.02769093]]


100%|██████████| 137328/137328 [01:02<00:00, 2213.90it/s]
  0%|          | 478/137328 [00:00<00:57, 2368.23it/s]

normalized log_p_hat: [[-100.67941999]]
normalized log_p: [[-100.7022525]]
convergence gap: [[0.02283252]]
iteration: 10


100%|██████████| 137328/137328 [01:03<00:00, 2175.11it/s]
  0%|          | 434/137328 [00:00<01:03, 2151.30it/s]

normalized log_p_hat: [[-100.66046803]]
normalized log_p: [[-100.67941999]]
convergence gap: [[0.01895195]]


100%|██████████| 137328/137328 [01:01<00:00, 2225.26it/s]
  0%|          | 216/137328 [00:00<01:03, 2150.64it/s]

normalized log_p_hat: [[-100.64465236]]
normalized log_p: [[-100.66046803]]
convergence gap: [[0.01581567]]


100%|██████████| 137328/137328 [01:01<00:00, 2234.19it/s]
  0%|          | 235/137328 [00:00<00:58, 2347.84it/s]

normalized log_p_hat: [[-100.63139579]]
normalized log_p: [[-100.64465236]]
convergence gap: [[0.01325658]]


100%|██████████| 137328/137328 [01:01<00:00, 2217.37it/s]
  0%|          | 218/137328 [00:00<01:03, 2172.07it/s]

normalized log_p_hat: [[-100.62024378]]
normalized log_p: [[-100.63139579]]
convergence gap: [[0.01115201]]


100%|██████████| 137328/137328 [01:01<00:00, 2239.62it/s]
  0%|          | 212/137328 [00:00<01:04, 2114.90it/s]

normalized log_p_hat: [[-100.6108337]]
normalized log_p: [[-100.62024378]]
convergence gap: [[0.00941008]]
iteration: 15


100%|██████████| 137328/137328 [00:57<00:00, 2393.63it/s]
  0%|          | 496/137328 [00:00<00:55, 2463.56it/s]

normalized log_p_hat: [[-100.60287292]]
normalized log_p: [[-100.6108337]]
convergence gap: [[0.00796078]]


100%|██████████| 137328/137328 [00:58<00:00, 2339.16it/s]
  0%|          | 232/137328 [00:00<00:59, 2312.12it/s]

normalized log_p_hat: [[-100.59612306]]
normalized log_p: [[-100.60287292]]
convergence gap: [[0.00674986]]


100%|██████████| 137328/137328 [00:57<00:00, 2369.12it/s]
  0%|          | 212/137328 [00:00<01:04, 2114.38it/s]

normalized log_p_hat: [[-100.59038839]]
normalized log_p: [[-100.59612306]]
convergence gap: [[0.00573467]]


100%|██████████| 137328/137328 [00:57<00:00, 2393.70it/s]
  0%|          | 480/137328 [00:00<00:57, 2388.15it/s]

normalized log_p_hat: [[-100.58550713]]
normalized log_p: [[-100.59038839]]
convergence gap: [[0.00488125]]


100%|██████████| 137328/137328 [00:57<00:00, 2389.34it/s]
  0%|          | 243/137328 [00:00<00:56, 2425.98it/s]

normalized log_p_hat: [[-100.58134493]]
normalized log_p: [[-100.58550713]]
convergence gap: [[0.0041622]]
iteration: 20


100%|██████████| 137328/137328 [00:57<00:00, 2383.90it/s]
  0%|          | 214/137328 [00:00<01:04, 2131.92it/s]

normalized log_p_hat: [[-100.57778972]]
normalized log_p: [[-100.58134493]]
convergence gap: [[0.00355521]]


100%|██████████| 137328/137328 [00:57<00:00, 2383.01it/s]
  0%|          | 481/137328 [00:00<00:57, 2378.84it/s]

normalized log_p_hat: [[-100.57474776]]
normalized log_p: [[-100.57778972]]
convergence gap: [[0.00304196]]


100%|██████████| 137328/137328 [00:58<00:00, 2361.97it/s]
  0%|          | 494/137328 [00:00<00:55, 2444.15it/s]

normalized log_p_hat: [[-100.57214044]]
normalized log_p: [[-100.57474776]]
convergence gap: [[0.00260732]]


100%|██████████| 137328/137328 [00:57<00:00, 2368.75it/s]
  0%|          | 244/137328 [00:00<00:56, 2434.96it/s]

normalized log_p_hat: [[-100.56990174]]
normalized log_p: [[-100.57214044]]
convergence gap: [[0.0022387]]


100%|██████████| 137328/137328 [00:58<00:00, 2366.04it/s]
  0%|          | 480/137328 [00:00<00:57, 2376.13it/s]

normalized log_p_hat: [[-100.5679761]]
normalized log_p: [[-100.56990174]]
convergence gap: [[0.00192564]]
iteration: 25


100%|██████████| 137328/137328 [00:57<00:00, 2384.67it/s]
  0%|          | 245/137328 [00:00<00:56, 2442.01it/s]

normalized log_p_hat: [[-100.56631673]]
normalized log_p: [[-100.5679761]]
convergence gap: [[0.00165937]]


100%|██████████| 137328/137328 [00:59<00:00, 2324.70it/s]
  0%|          | 216/137328 [00:00<01:03, 2152.23it/s]

normalized log_p_hat: [[-100.56488416]]
normalized log_p: [[-100.56631673]]
convergence gap: [[0.00143256]]


100%|██████████| 137328/137328 [01:01<00:00, 2240.57it/s]
  0%|          | 242/137328 [00:00<00:56, 2419.97it/s]

normalized log_p_hat: [[-100.5636451]]
normalized log_p: [[-100.56488416]]
convergence gap: [[0.00123906]]


100%|██████████| 137328/137328 [01:00<00:00, 2269.26it/s]
  0%|          | 201/137328 [00:00<01:08, 2002.11it/s]

normalized log_p_hat: [[-100.56257138]]
normalized log_p: [[-100.5636451]]
convergence gap: [[0.00107372]]


100%|██████████| 137328/137328 [01:09<00:00, 1977.95it/s]
  0%|          | 373/137328 [00:00<01:11, 1911.03it/s]

normalized log_p_hat: [[-100.56163917]]
normalized log_p: [[-100.56257138]]
convergence gap: [[0.00093221]]
iteration: 30


100%|██████████| 137328/137328 [01:01<00:00, 2218.31it/s]
  0%|          | 211/137328 [00:00<01:05, 2103.39it/s]

normalized log_p_hat: [[-100.56082829]]
normalized log_p: [[-100.56163917]]
convergence gap: [[0.00081088]]


100%|██████████| 137328/137328 [01:01<00:00, 2232.80it/s]
  0%|          | 482/137328 [00:00<00:57, 2389.81it/s]

normalized log_p_hat: [[-100.56012163]]
normalized log_p: [[-100.56082829]]
convergence gap: [[0.00070667]]


100%|██████████| 137328/137328 [01:01<00:00, 2241.42it/s]
  0%|          | 214/137328 [00:00<01:04, 2129.67it/s]

normalized log_p_hat: [[-100.55950462]]
normalized log_p: [[-100.56012163]]
convergence gap: [[0.00061701]]


100%|██████████| 137328/137328 [01:01<00:00, 2239.37it/s]
  0%|          | 236/137328 [00:00<00:58, 2357.83it/s]

normalized log_p_hat: [[-100.5589649]]
normalized log_p: [[-100.55950462]]
convergence gap: [[0.00053972]]


100%|██████████| 137328/137328 [01:00<00:00, 2253.36it/s]

normalized log_p_hat: [[-100.55849192]]
normalized log_p: [[-100.5589649]]
convergence gap: [[0.00047298]]





In [28]:
# 35 iterations, ~35 min
np.save('results/mcmichael_mu.npy', mu_hat)
np.save('results/mcmichael_R.npy', R_hat)
np.save('results/mcmichael_log_p.npy', log_p_hat)

## Evaluation

In [29]:
def evaluate(mu, R):
    square_error = 0
    l = 0
    for t in tqdm(range(n)):
        movie_ids_t = t_movie_ids_labels_dict[t]
        labels_t = t_labels_dict[t]

        # calculate X_t_hat
        y_t = t_y_dict[t]
        H_xt = t_Hx_dict[t]
        H_xt_trans = t_Hx_trans_dict[t]
        H_yt = t_Hy_dict[t]
        H_yt_trans = t_Hy_trans_dict[t]
        
        R_xt = H_xt @ R @ H_xt_trans
        R_yt = H_yt @ R @ H_yt_trans
        R_yt_inv = tf.linalg.inv(R_yt)
        R_xtyt = H_xt @ R @ H_yt_trans
        
        mu_yt = tf.matmul(H_yt, mu)
        mu_xt = tf.matmul(H_xt, mu)

        X_t_hat = R_xtyt @ R_yt_inv @ (y_t - mu_yt) + mu_xt
        
        # clip ratings
        predictions_t = tf.matmul(H_xt_trans, X_t_hat).numpy()[movie_ids_t-1]
        predictions_t = np.clip(predictions_t, 1, 5)
        
        # accumulate square_error and l
        square_error += tf.matmul(tf.transpose(labels_t - predictions_t), labels_t - predictions_t)
        l += len(labels_t)
    return np.sqrt(square_error/l)

In [30]:
em_mu = np.load('results/em_mu.npy')
em_R = np.load('results/em_R.npy')
rmse = evaluate(em_mu, em_R)
rmse

100%|██████████| 137328/137328 [02:02<00:00, 1121.03it/s]


array([[1.07507623]])

In [31]:
mcmichael_mu = np.load('results/mcmichael_mu.npy')
mcmichael_R = np.load('results/mcmichael_R.npy')
rmse = evaluate(mcmichael_mu, mcmichael_R)
rmse

100%|██████████| 137328/137328 [01:58<00:00, 1159.83it/s]


array([[1.07510565]])