# Product Recommendation
Reference: https://ieeexplore.ieee.org/document/5430993

In [49]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm

In [50]:
tf.__version__

'2.2.0'

## Data Preprocessing

In [51]:
Y_data = pd.read_csv('data/Y.csv', header=None, names=['Rating','Movie','User'], dtype=int) # training data
P_data = pd.read_csv('data/P.csv', header=None, names=['Rating','Movie','User'], dtype=int) # test data ('probe-set' mentioned in paper)

In [52]:
display(Y_data.head())
display(P_data.head())

Unnamed: 0,Rating,Movie,User
0,5,2,1
1,4,7,1
2,4,8,1
3,4,11,1
4,4,12,1


Unnamed: 0,Rating,Movie,User
0,3,6,1
1,5,96,1
2,3,1,2
3,3,33,3
4,5,42,4


In [53]:
Y_data.shape, P_data.shape

((3399874, 3), (189699, 3))

In [54]:
print(Y_data['Rating'].unique().max(), Y_data['Movie'].unique().max(), Y_data['User'].unique().max())
print(P_data['Rating'].unique().max(), P_data['Movie'].unique().max(), P_data['User'].unique().max())

5 100 137328
5 100 137328


In [55]:
k, n = Y_data['Movie'].unique().max(), Y_data['User'].unique().max()
k, n

(100, 137328)

In [56]:
indices = np.reshape(Y_data[['Movie', 'User']].values-1, (-1, 2))
print(indices.shape)
indices

(3399874, 2)


array([[     1,      0],
       [     6,      0],
       [     7,      0],
       ...,
       [    97, 137327],
       [    98, 137327],
       [    99, 137327]])

In [57]:
Z_sparse = tf.SparseTensor(indices=indices, values=Y_data['Rating'].values, dense_shape=[k, n])
Z_sparse = tf.cast(Z_sparse, tf.float64)

In [10]:
# %%time
# t = 0
# Z_t = tf.sparse.slice(Z_sparse, [0, t], [100, 1])

CPU times: user 19.2 ms, sys: 1.05 ms, total: 20.3 ms
Wall time: 18.8 ms


In [58]:
# use dense matrices for faster linear transformations since all matrices can fit in memory
Z = tf.sparse.to_dense(Z_sparse, validate_indices=False)
Z

<tf.Tensor: shape=(100, 137328), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [5., 0., 0., ..., 0., 0., 3.],
       [0., 0., 0., ..., 3., 0., 0.],
       ...,
       [5., 0., 0., ..., 4., 0., 4.],
       [4., 0., 3., ..., 0., 0., 4.],
       [3., 4., 0., ..., 4., 5., 4.]])>

In [60]:
# memoization
t_Z_dict = {}
t_y_dict = {}
t_x_dict = {}
t_Hy_dict = {}
t_Hx_dict = {}
t_Hy_trans_dict = {}
t_Hx_trans_dict = {}
t_movie_ids_labels_dict = {}
t_labels_dict = {}

for t in tqdm(range(n)):
    movie_ids = Y_data['Movie'][Y_data['User']==t+1].values
    H_yt = tf.constant(np.identity(k)[movie_ids-1], dtype=tf.float64)
    H_xt = tf.constant(np.delete(np.identity(k), movie_ids-1, 0), dtype=tf.float64)
    Z_t = tf.expand_dims(Z[:, t], axis=1) # alternative: Z_t = tf.sparse.slice(Z_sparse, [0, t], [100, 1]) 
    y_t = tf.matmul(H_yt, Z_t)
    x_t = tf.matmul(H_xt, Z_t)
    
    # store the variables for fast future reference
    t_Hy_dict[t] = H_yt
    t_Hx_dict[t] = H_xt
    t_Hx_trans_dict[t] = tf.transpose(H_xt)
    t_Hy_trans_dict[t] = tf.transpose(H_yt)
    
    t_x_dict[t] = x_t
    t_y_dict[t] = y_t
    t_Z_dict[t] = Z_t
    
    t_movie_ids_labels_dict[t] = P_data['Movie'][P_data['User']==t+1].values
    t_labels_dict[t] = tf.expand_dims(P_data['Rating'][P_data['User']==t+1].values, axis=1)

100%|██████████| 137328/137328 [13:45<00:00, 166.34it/s]


## Initialization
$\mu$ has 1 type available <br />
R has 4 types available

In [61]:
# initial estimate of mu
N = 0
H_yty_t = 0

for t in tqdm(range(n)):
    N += tf.matmul(t_Hy_trans_dict[t], t_Hy_dict[t])
    H_yty_t += tf.matmul(t_Hy_trans_dict[t], t_y_dict[t])

100%|██████████| 137328/137328 [00:15<00:00, 8605.84it/s] 


In [62]:
# The ith diagonal element of N equals the total number of ratings of the ith product.
N

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[20017.,     0.,     0., ...,     0.,     0.,     0.],
       [    0., 23917.,     0., ...,     0.,     0.,     0.],
       [    0.,     0., 31634., ...,     0.,     0.,     0.],
       ...,
       [    0.,     0.,     0., ..., 60896.,     0.,     0.],
       [    0.,     0.,     0., ...,     0., 61521.,     0.],
       [    0.,     0.,     0., ...,     0.,     0., 64506.]])>

In [63]:
print(H_yty_t.shape)
mu_hat0 = tf.matmul(tf.linalg.inv(N), H_yty_t)
mu_hat0.shape

(100, 1)


TensorShape([100, 1])

In [64]:
# initial estimates of R (4 types available)
R_hat0_1 = tf.constant(np.identity(k), dtype=tf.float64)
R_hat0_1

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>

In [69]:
S = 0
for t in tqdm(range(n)):
    Hyt = t_Hy_dict[t]
    yt = t_y_dict[t]
    Hytmu_hat0 = tf.matmul(Hyt, mu_hat0)
    S += tf.matmul(tf.transpose(Hyt), tf.matmul(yt - Hytmu_hat0, tf.matmul(tf.transpose(yt - Hytmu_hat0), Hyt)))

100%|██████████| 137328/137328 [00:31<00:00, 4384.91it/s]


In [70]:
# diag_S is the diagonal matrix consisting of the diagonal elements of S
diag_S = tf.linalg.diag(tf.linalg.tensor_diag_part(S))
R_hat0_2 = tf.matmul(tf.linalg.inv(N), diag_S)
R_hat0_2

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[1.72440427, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.94219113, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.43659411, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.18291506, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.03485685,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.26227449]])>

In [71]:
# R_hat0_3 is not a good initializer when rating variances are far from one
R_hat0_3 = tf.matmul(tf.linalg.sqrtm(tf.linalg.inv(diag_S)), tf.matmul(S, tf.linalg.sqrtm(tf.linalg.inv(diag_S))))
R_hat0_3

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[ 1.        ,  0.07418256, -0.01158277, ..., -0.01462987,
        -0.02215371, -0.01844816],
       [ 0.07418256,  1.        ,  0.03674347, ...,  0.0256191 ,
         0.03563234,  0.03926307],
       [-0.01158277,  0.03674347,  1.        , ...,  0.10955311,
         0.12823359,  0.15560634],
       ...,
       [-0.01462987,  0.0256191 ,  0.10955311, ...,  1.        ,
         0.19781317,  0.15164928],
       [-0.02215371,  0.03563234,  0.12823359, ...,  0.19781317,
         1.        ,  0.18995689],
       [-0.01844816,  0.03926307,  0.15560634, ...,  0.15164928,
         0.18995689,  1.        ]])>

In [72]:
R_hat0_4 = tf.matmul(tf.linalg.sqrtm(tf.linalg.inv(N)), tf.matmul(S, tf.linalg.sqrtm(tf.linalg.inv(N))))
R_hat0_4

<tf.Tensor: shape=(100, 100), dtype=float64, numpy=
array([[ 1.72440427,  0.09455639, -0.01823052, ..., -0.02089473,
        -0.02959417, -0.02721758],
       [ 0.09455639,  0.94219113,  0.04274809, ...,  0.02704644,
         0.03518471,  0.04281842],
       [-0.01823052,  0.04274809,  1.43659411, ...,  0.14281326,
         0.15635399,  0.20954206],
       ...,
       [-0.02089473,  0.02704644,  0.14281326, ...,  1.18291506,
         0.21886288,  0.18530794],
       [-0.02959417,  0.03518471,  0.15635399, ...,  0.21886288,
         1.03485685,  0.21710614],
       [-0.02721758,  0.04281842,  0.20954206, ...,  0.18530794,
         0.21710614,  1.26227449]])>

## Expectation Maximization Algorithm

In [113]:
LOG_2PI = tf.math.log(2*tf.constant(np.pi, dtype=tf.float64))

@tf.function(experimental_relax_shapes=True)
def run_graph_em(mu, R, y_t, H_xt, H_xt_trans, H_yt, H_yt_trans):
    # for R estimation
    R_xt = H_xt @ R @ H_xt_trans
    R_yt = H_yt @ R @ H_yt_trans
    R_yt_det = tf.linalg.det(R_yt)
    R_yt_inv = tf.linalg.inv(R_yt)
    R_xtyt = H_xt @ R @ H_yt_trans

    mu_yt = tf.matmul(H_yt, mu)
    mu_xt = tf.matmul(H_xt, mu)

    X_t_hat = R_xtyt @ R_yt_inv @ (y_t - mu_yt) + mu_xt
    Z_t_hat = H_yt_trans @ y_t + H_xt_trans @ X_t_hat
    
    R_hat_sum_part = (Z_t_hat - mu) @ tf.transpose(Z_t_hat - mu) \
                        + H_xt_trans @ (R_xt - R_xtyt @ R_yt_inv @ tf.transpose(R_xtyt)) @ H_xt

    # for mu estimation
    Hyt_trans_Ryt_inv_Hyt_sum_part = H_yt_trans @ R_yt_inv @ H_yt
    Hyt_trans_Ryt_inv_yt_sum_part = H_yt_trans @ R_yt_inv @ y_t

    # for log likelihood calculation
    log_p_hat_part = -1/2*(tf.math.log(R_yt_det) + tf.transpose(y_t - mu_yt) @ R_yt_inv @ (y_t - mu_yt) + k*LOG_2PI) 
    
    return R_hat_sum_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part

In [104]:
def expectation_maximization(mu, R):
    Hyt_trans_Ryt_inv_Hyt_sum = 0
    Hyt_trans_Ryt_inv_yt_sum = 0
    R_hat_sum = 0
    log_p_hat = 0
    
    for t in tqdm(range(n)):
        y_t = t_y_dict[t]
        H_xt = t_Hx_dict[t]
        H_xt_trans = t_Hx_trans_dict[t]
        H_yt = t_Hy_dict[t]
        H_yt_trans = t_Hy_trans_dict[t]
        
        R_hat_sum_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part = \
            run_graph_em(mu, R, y_t, H_xt, H_xt_trans, H_yt, H_yt_trans)
        
        R_hat_sum += R_hat_sum_part
        Hyt_trans_Ryt_inv_Hyt_sum += Hyt_trans_Ryt_inv_Hyt_sum_part
        Hyt_trans_Ryt_inv_yt_sum += Hyt_trans_Ryt_inv_yt_sum_part
        log_p_hat += log_p_hat_part
        
    R_hat = R_hat_sum / n
    mu_hat = tf.matmul(tf.linalg.inv(Hyt_trans_Ryt_inv_Hyt_sum), Hyt_trans_Ryt_inv_yt_sum)    
    return mu_hat, R_hat, log_p_hat

In [106]:
delta = 0.0005
mu = mu_hat0
R = R_hat0_4
log_p = tf.constant(-np.inf, dtype=tf.float64)

for i in range(30):
    if i % 5 == 0:
        print(f'iteration: {i}')
    
    mu_hat, R_hat, log_p_hat = expectation_maximization(mu, R)
    convergence_criterion = log_p_hat/n - log_p/n < delta
    
    print('normalized log_p_hat:', (log_p_hat/n).numpy())
    print('normalized log_p:', (log_p/n).numpy())
    print('convergence gap:', (log_p_hat/n - log_p/n).numpy())
    
    if convergence_criterion:
        break
        
    # use new estimattions for next iteration
    mu = mu_hat
    R = R_hat
    log_p = log_p_hat

  0%|          | 254/137328 [00:00<01:52, 1216.96it/s]

iteration: 0


100%|██████████| 137328/137328 [01:26<00:00, 1588.54it/s]
  0%|          | 133/137328 [00:00<01:43, 1322.67it/s]

normalized log_p_hat: [[-101.37606558]]
normalized log_p: -inf
convergence gap: [[inf]]


100%|██████████| 137328/137328 [01:29<00:00, 1539.10it/s]
  0%|          | 132/137328 [00:00<01:44, 1311.52it/s]

normalized log_p_hat: [[-101.07910104]]
normalized log_p: [[-101.37606558]]
convergence gap: [[0.29696454]]


100%|██████████| 137328/137328 [01:29<00:00, 1535.26it/s]
  0%|          | 109/137328 [00:00<02:05, 1089.23it/s]

normalized log_p_hat: [[-100.92736468]]
normalized log_p: [[-101.07910104]]
convergence gap: [[0.15173636]]


100%|██████████| 137328/137328 [01:28<00:00, 1554.14it/s]
  0%|          | 156/137328 [00:00<01:28, 1553.32it/s]

normalized log_p_hat: [[-100.83160767]]
normalized log_p: [[-100.92736468]]
convergence gap: [[0.09575701]]


100%|██████████| 137328/137328 [01:28<00:00, 1556.88it/s]
  0%|          | 132/137328 [00:00<01:44, 1315.08it/s]

normalized log_p_hat: [[-100.76501937]]
normalized log_p: [[-100.83160767]]
convergence gap: [[0.0665883]]
iteration: 5


100%|██████████| 137328/137328 [01:31<00:00, 1494.36it/s]
  0%|          | 157/137328 [00:00<01:27, 1562.38it/s]

normalized log_p_hat: [[-100.71663616]]
normalized log_p: [[-100.76501937]]
convergence gap: [[0.04838321]]


100%|██████████| 137328/137328 [01:30<00:00, 1514.79it/s]
  0%|          | 169/137328 [00:00<01:21, 1688.48it/s]

normalized log_p_hat: [[-100.68061666]]
normalized log_p: [[-100.71663616]]
convergence gap: [[0.03601951]]


100%|██████████| 137328/137328 [01:22<00:00, 1663.21it/s]
  0%|          | 167/137328 [00:00<01:22, 1668.97it/s]

normalized log_p_hat: [[-100.65335758]]
normalized log_p: [[-100.68061666]]
convergence gap: [[0.02725908]]


100%|██████████| 137328/137328 [01:22<00:00, 1669.91it/s]
  0%|          | 167/137328 [00:00<01:22, 1661.91it/s]

normalized log_p_hat: [[-100.63248001]]
normalized log_p: [[-100.65335758]]
convergence gap: [[0.02087757]]


100%|██████████| 137328/137328 [01:23<00:00, 1649.45it/s]
  0%|          | 162/137328 [00:00<01:25, 1612.08it/s]

normalized log_p_hat: [[-100.61634676]]
normalized log_p: [[-100.63248001]]
convergence gap: [[0.01613325]]
iteration: 10


100%|██████████| 137328/137328 [01:26<00:00, 1591.01it/s]
  0%|          | 139/137328 [00:00<01:39, 1384.42it/s]

normalized log_p_hat: [[-100.60379461]]
normalized log_p: [[-100.61634676]]
convergence gap: [[0.01255214]]


100%|██████████| 137328/137328 [01:27<00:00, 1562.72it/s]
  0%|          | 142/137328 [00:00<01:36, 1419.03it/s]

normalized log_p_hat: [[-100.59397571]]
normalized log_p: [[-100.60379461]]
convergence gap: [[0.0098189]]


100%|██████████| 137328/137328 [01:28<00:00, 1546.74it/s]
  0%|          | 67/137328 [00:00<03:25, 668.44it/s]

normalized log_p_hat: [[-100.5862597]]
normalized log_p: [[-100.59397571]]
convergence gap: [[0.00771601]]


100%|██████████| 137328/137328 [01:34<00:00, 1458.94it/s]
  0%|          | 159/137328 [00:00<01:26, 1589.05it/s]

normalized log_p_hat: [[-100.58017111]]
normalized log_p: [[-100.5862597]]
convergence gap: [[0.00608859]]


100%|██████████| 137328/137328 [01:30<00:00, 1511.51it/s]
  0%|          | 160/137328 [00:00<01:25, 1595.50it/s]

normalized log_p_hat: [[-100.5753476]]
normalized log_p: [[-100.58017111]]
convergence gap: [[0.0048235]]
iteration: 15


100%|██████████| 137328/137328 [01:32<00:00, 1480.77it/s]
  0%|          | 156/137328 [00:00<01:28, 1556.55it/s]

normalized log_p_hat: [[-100.57151119]]
normalized log_p: [[-100.5753476]]
convergence gap: [[0.00383642]]


100%|██████████| 137328/137328 [01:31<00:00, 1501.09it/s]
  0%|          | 159/137328 [00:00<01:26, 1584.23it/s]

normalized log_p_hat: [[-100.56844755]]
normalized log_p: [[-100.57151119]]
convergence gap: [[0.00306364]]


100%|██████████| 137328/137328 [01:33<00:00, 1472.71it/s]
  0%|          | 150/137328 [00:00<01:31, 1499.72it/s]

normalized log_p_hat: [[-100.56599094]]
normalized log_p: [[-100.56844755]]
convergence gap: [[0.00245661]]


100%|██████████| 137328/137328 [01:26<00:00, 1581.36it/s]
  0%|          | 142/137328 [00:00<01:36, 1415.65it/s]

normalized log_p_hat: [[-100.56401278]]
normalized log_p: [[-100.56599094]]
convergence gap: [[0.00197815]]


100%|██████████| 137328/137328 [01:32<00:00, 1486.95it/s]
  0%|          | 144/137328 [00:00<01:35, 1433.59it/s]

normalized log_p_hat: [[-100.56241312]]
normalized log_p: [[-100.56401278]]
convergence gap: [[0.00159966]]
iteration: 20


100%|██████████| 137328/137328 [01:28<00:00, 1552.10it/s]
  0%|          | 317/137328 [00:00<01:27, 1572.15it/s]

normalized log_p_hat: [[-100.561114]]
normalized log_p: [[-100.56241312]]
convergence gap: [[0.00129912]]


100%|██████████| 137328/137328 [01:29<00:00, 1536.28it/s]
  0%|          | 159/137328 [00:00<01:26, 1582.91it/s]

normalized log_p_hat: [[-100.56005448]]
normalized log_p: [[-100.561114]]
convergence gap: [[0.00105952]]


100%|██████████| 137328/137328 [01:30<00:00, 1517.85it/s]
  0%|          | 153/137328 [00:00<01:29, 1526.22it/s]

normalized log_p_hat: [[-100.55918673]]
normalized log_p: [[-100.56005448]]
convergence gap: [[0.00086775]]


100%|██████████| 137328/137328 [01:31<00:00, 1505.32it/s]
  0%|          | 155/137328 [00:00<01:28, 1544.04it/s]

normalized log_p_hat: [[-100.55847312]]
normalized log_p: [[-100.55918673]]
convergence gap: [[0.00071361]]


100%|██████████| 137328/137328 [01:31<00:00, 1498.94it/s]
  0%|          | 110/137328 [00:00<02:05, 1097.32it/s]

normalized log_p_hat: [[-100.55788391]]
normalized log_p: [[-100.55847312]]
convergence gap: [[0.00058921]]
iteration: 25


100%|██████████| 137328/137328 [01:30<00:00, 1514.13it/s]

normalized log_p_hat: [[-100.5573955]]
normalized log_p: [[-100.55788391]]
convergence gap: [[0.00048841]]





In [107]:
# 26 iterations, ~39 min
np.save('results/em_mu.npy', mu_hat)
np.save('results/em_R.npy', R_hat)
np.save('results/em_log_p.npy', log_p_hat)

## McMichael’s Algorithm

In [114]:
@tf.function(experimental_relax_shapes=True)
def run_graph_mcmichael(mu, R, y_t, H_yt, H_yt_trans):
    # for R estimation
    R_yt = H_yt @ R @ H_yt_trans
    R_yt_det = tf.linalg.det(R_yt)
    R_yt_inv = tf.linalg.inv(R_yt)
    mu_yt = tf.matmul(H_yt, mu)
    log_p_gradient_part = H_yt_trans @ (R_yt_inv - R_yt_inv @ (y_t - mu_yt) @ tf.transpose(y_t - mu_yt) @ R_yt_inv) @ H_yt

    # for mu estimation
    Hyt_trans_Ryt_inv_Hyt_sum_part = H_yt_trans @ R_yt_inv @ H_yt
    Hyt_trans_Ryt_inv_yt_sum_part = H_yt_trans @ R_yt_inv @ y_t
    
    # for log likelihood calculation
    log_p_hat_part = -1/2*(tf.math.log(R_yt_det) + tf.transpose(y_t - mu_yt) @ R_yt_inv @ (y_t - mu_yt) + k*LOG_2PI)
    
    return log_p_gradient_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part

In [115]:
def mcmichael(mu, R):
    gamma = 0.00001
    Hyt_trans_Ryt_inv_Hyt_sum = 0
    Hyt_trans_Ryt_inv_yt_sum = 0
    log_p_gradient = 0
    log_p_hat = 0

    for t in tqdm(range(n)):
        y_t = t_y_dict[t]
        H_yt = t_Hy_dict[t]
        H_yt_trans = t_Hy_trans_dict[t]
        
        log_p_gradient_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part = \
            run_graph_mcmichael(mu, R, y_t, H_yt, H_yt_trans)
        
        log_p_gradient += log_p_gradient_part
        Hyt_trans_Ryt_inv_Hyt_sum += Hyt_trans_Ryt_inv_Hyt_sum_part
        Hyt_trans_Ryt_inv_yt_sum += Hyt_trans_Ryt_inv_yt_sum_part
        log_p_hat += log_p_hat_part
        
    R_hat = R + gamma*(R @ (-1/2*log_p_gradient) @ R)
    mu_hat = tf.matmul(tf.linalg.inv(Hyt_trans_Ryt_inv_Hyt_sum), Hyt_trans_Ryt_inv_yt_sum)
    return mu_hat, R_hat, log_p_hat

In [119]:
delta = 0.0005
mu = mu_hat0
R = R_hat0_4
log_p = tf.constant(-np.inf, dtype=tf.float64)

for i in range(40):
    if i % 5 == 0:
        print(f'iteration: {i}')
    
    mu_hat, R_hat, log_p_hat = mcmichael(mu, R)
    convergence_criterion = log_p_hat/n - log_p/n < delta
    
    print('normalized log_p_hat:', (log_p_hat/n).numpy())
    print('normalized log_p:', (log_p/n).numpy())
    print('convergence gap:', (log_p_hat/n - log_p/n).numpy())
    
    if convergence_criterion:
        break
        
    # use new estimattions for next iteration
    mu = mu_hat
    R = R_hat
    log_p = log_p_hat

  0%|          | 165/137328 [00:00<01:23, 1649.57it/s]

iteration: 0


100%|██████████| 137328/137328 [01:02<00:00, 2210.07it/s]
  0%|          | 193/137328 [00:00<01:11, 1927.05it/s]

normalized log_p_hat: [[-101.37606558]]
normalized log_p: -inf
convergence gap: [[inf]]


100%|██████████| 137328/137328 [01:02<00:00, 2186.89it/s]
  0%|          | 196/137328 [00:00<01:10, 1947.64it/s]

normalized log_p_hat: [[-101.15033828]]
normalized log_p: [[-101.37606558]]
convergence gap: [[0.2257273]]


100%|██████████| 137328/137328 [01:03<00:00, 2178.72it/s]
  0%|          | 219/137328 [00:00<01:02, 2186.05it/s]

normalized log_p_hat: [[-101.0176644]]
normalized log_p: [[-101.15033828]]
convergence gap: [[0.13267387]]


100%|██████████| 137328/137328 [01:01<00:00, 2239.03it/s]
  0%|          | 205/137328 [00:00<01:07, 2044.42it/s]

normalized log_p_hat: [[-100.92620553]]
normalized log_p: [[-101.0176644]]
convergence gap: [[0.09145888]]


100%|██████████| 137328/137328 [01:02<00:00, 2183.10it/s]
  0%|          | 196/137328 [00:00<01:09, 1959.81it/s]

normalized log_p_hat: [[-100.85833264]]
normalized log_p: [[-100.92620553]]
convergence gap: [[0.06787289]]
iteration: 5


100%|██████████| 137328/137328 [01:00<00:00, 2281.33it/s]
  0%|          | 214/137328 [00:00<01:04, 2133.54it/s]

normalized log_p_hat: [[-100.80569404]]
normalized log_p: [[-100.85833264]]
convergence gap: [[0.0526386]]


100%|██████████| 137328/137328 [01:01<00:00, 2246.61it/s]
  0%|          | 197/137328 [00:00<01:09, 1964.22it/s]

normalized log_p_hat: [[-100.76380998]]
normalized log_p: [[-100.80569404]]
convergence gap: [[0.04188405]]


100%|██████████| 137328/137328 [01:00<00:00, 2264.31it/s]
  0%|          | 204/137328 [00:00<01:07, 2039.98it/s]

normalized log_p_hat: [[-100.72994344]]
normalized log_p: [[-100.76380998]]
convergence gap: [[0.03386655]]


100%|██████████| 137328/137328 [01:00<00:00, 2270.03it/s]
  0%|          | 216/137328 [00:00<01:03, 2151.57it/s]

normalized log_p_hat: [[-100.7022525]]
normalized log_p: [[-100.72994344]]
convergence gap: [[0.02769093]]


100%|██████████| 137328/137328 [01:00<00:00, 2276.62it/s]
  0%|          | 199/137328 [00:00<01:08, 1988.94it/s]

normalized log_p_hat: [[-100.67941999]]
normalized log_p: [[-100.7022525]]
convergence gap: [[0.02283252]]
iteration: 10


100%|██████████| 137328/137328 [01:00<00:00, 2270.50it/s]
  0%|          | 204/137328 [00:00<01:07, 2037.45it/s]

normalized log_p_hat: [[-100.66046803]]
normalized log_p: [[-100.67941999]]
convergence gap: [[0.01895195]]


100%|██████████| 137328/137328 [01:03<00:00, 2164.01it/s]
  0%|          | 210/137328 [00:00<01:05, 2099.52it/s]

normalized log_p_hat: [[-100.64465236]]
normalized log_p: [[-100.66046803]]
convergence gap: [[0.01581567]]


100%|██████████| 137328/137328 [00:59<00:00, 2303.04it/s]
  0%|          | 237/137328 [00:00<00:57, 2368.87it/s]

normalized log_p_hat: [[-100.63139579]]
normalized log_p: [[-100.64465236]]
convergence gap: [[0.01325658]]


100%|██████████| 137328/137328 [01:00<00:00, 2268.23it/s]
  0%|          | 191/137328 [00:00<01:12, 1903.77it/s]

normalized log_p_hat: [[-100.62024378]]
normalized log_p: [[-100.63139579]]
convergence gap: [[0.01115201]]


100%|██████████| 137328/137328 [01:01<00:00, 2250.48it/s]
  0%|          | 230/137328 [00:00<00:59, 2292.73it/s]

normalized log_p_hat: [[-100.6108337]]
normalized log_p: [[-100.62024378]]
convergence gap: [[0.00941008]]
iteration: 15


100%|██████████| 137328/137328 [00:58<00:00, 2329.22it/s]
  0%|          | 237/137328 [00:00<00:58, 2362.37it/s]

normalized log_p_hat: [[-100.60287292]]
normalized log_p: [[-100.6108337]]
convergence gap: [[0.00796078]]


100%|██████████| 137328/137328 [00:59<00:00, 2312.22it/s]
  0%|          | 196/137328 [00:00<01:10, 1956.11it/s]

normalized log_p_hat: [[-100.59612306]]
normalized log_p: [[-100.60287292]]
convergence gap: [[0.00674986]]


100%|██████████| 137328/137328 [01:02<00:00, 2213.59it/s]
  0%|          | 208/137328 [00:00<01:06, 2075.45it/s]

normalized log_p_hat: [[-100.59038839]]
normalized log_p: [[-100.59612306]]
convergence gap: [[0.00573467]]


100%|██████████| 137328/137328 [01:01<00:00, 2239.23it/s]
  0%|          | 238/137328 [00:00<00:57, 2377.65it/s]

normalized log_p_hat: [[-100.58550713]]
normalized log_p: [[-100.59038839]]
convergence gap: [[0.00488125]]


100%|██████████| 137328/137328 [00:59<00:00, 2313.85it/s]
  0%|          | 237/137328 [00:00<00:57, 2367.87it/s]

normalized log_p_hat: [[-100.58134493]]
normalized log_p: [[-100.58550713]]
convergence gap: [[0.0041622]]
iteration: 20


100%|██████████| 137328/137328 [00:58<00:00, 2342.53it/s]
  0%|          | 237/137328 [00:00<00:57, 2366.24it/s]

normalized log_p_hat: [[-100.57778972]]
normalized log_p: [[-100.58134493]]
convergence gap: [[0.00355521]]


100%|██████████| 137328/137328 [01:00<00:00, 2272.12it/s]
  0%|          | 204/137328 [00:00<01:07, 2031.32it/s]

normalized log_p_hat: [[-100.57474776]]
normalized log_p: [[-100.57778972]]
convergence gap: [[0.00304196]]


100%|██████████| 137328/137328 [01:01<00:00, 2229.80it/s]
  0%|          | 234/137328 [00:00<00:58, 2338.95it/s]

normalized log_p_hat: [[-100.57214044]]
normalized log_p: [[-100.57474776]]
convergence gap: [[0.00260732]]


100%|██████████| 137328/137328 [01:02<00:00, 2192.80it/s]
  0%|          | 236/137328 [00:00<00:58, 2358.02it/s]

normalized log_p_hat: [[-100.56990174]]
normalized log_p: [[-100.57214044]]
convergence gap: [[0.0022387]]


100%|██████████| 137328/137328 [00:59<00:00, 2306.11it/s]
  0%|          | 239/137328 [00:00<00:57, 2383.94it/s]

normalized log_p_hat: [[-100.5679761]]
normalized log_p: [[-100.56990174]]
convergence gap: [[0.00192564]]
iteration: 25


100%|██████████| 137328/137328 [01:00<00:00, 2272.78it/s]
  0%|          | 237/137328 [00:00<00:57, 2367.25it/s]

normalized log_p_hat: [[-100.56631673]]
normalized log_p: [[-100.5679761]]
convergence gap: [[0.00165937]]


100%|██████████| 137328/137328 [00:59<00:00, 2315.30it/s]
  0%|          | 224/137328 [00:00<01:01, 2237.79it/s]

normalized log_p_hat: [[-100.56488416]]
normalized log_p: [[-100.56631673]]
convergence gap: [[0.00143256]]


100%|██████████| 137328/137328 [00:58<00:00, 2347.74it/s]
  0%|          | 238/137328 [00:00<00:57, 2379.74it/s]

normalized log_p_hat: [[-100.5636451]]
normalized log_p: [[-100.56488416]]
convergence gap: [[0.00123906]]


100%|██████████| 137328/137328 [00:58<00:00, 2354.25it/s]
  0%|          | 240/137328 [00:00<00:57, 2390.43it/s]

normalized log_p_hat: [[-100.56257138]]
normalized log_p: [[-100.5636451]]
convergence gap: [[0.00107372]]


100%|██████████| 137328/137328 [00:59<00:00, 2289.45it/s]
  0%|          | 227/137328 [00:00<01:00, 2257.52it/s]

normalized log_p_hat: [[-100.56163917]]
normalized log_p: [[-100.56257138]]
convergence gap: [[0.00093221]]
iteration: 30


100%|██████████| 137328/137328 [01:02<00:00, 2207.86it/s]
  0%|          | 214/137328 [00:00<01:04, 2136.50it/s]

normalized log_p_hat: [[-100.56082829]]
normalized log_p: [[-100.56163917]]
convergence gap: [[0.00081088]]


100%|██████████| 137328/137328 [01:03<00:00, 2163.17it/s]
  0%|          | 214/137328 [00:00<01:04, 2131.84it/s]

normalized log_p_hat: [[-100.56012163]]
normalized log_p: [[-100.56082829]]
convergence gap: [[0.00070667]]


100%|██████████| 137328/137328 [01:02<00:00, 2194.76it/s]
  0%|          | 212/137328 [00:00<01:04, 2118.20it/s]

normalized log_p_hat: [[-100.55950462]]
normalized log_p: [[-100.56012163]]
convergence gap: [[0.00061701]]


100%|██████████| 137328/137328 [01:02<00:00, 2214.82it/s]
  0%|          | 214/137328 [00:00<01:04, 2134.16it/s]

normalized log_p_hat: [[-100.5589649]]
normalized log_p: [[-100.55950462]]
convergence gap: [[0.00053972]]


100%|██████████| 137328/137328 [01:01<00:00, 2224.25it/s]

normalized log_p_hat: [[-100.55849192]]
normalized log_p: [[-100.5589649]]
convergence gap: [[0.00047298]]





In [121]:
# 35 iterations, ~35 min
np.save('results/mcmichael_mu.npy', mu_hat)
np.save('results/mcmichael_R.npy', R_hat)
np.save('results/mcmichael_log_p.npy', log_p_hat)

## Evaluation

In [108]:
def evaluate(mu, R):
    square_error = 0
    l = 0
    for t in tqdm(range(n)):
        movie_ids_t = t_movie_ids_labels_dict[t]
        labels_t = t_labels_dict[t]

        # calculate X_t_hat
        y_t = t_y_dict[t]
        H_xt = t_Hx_dict[t]
        H_xt_trans = t_Hx_trans_dict[t]
        H_yt = t_Hy_dict[t]
        H_yt_trans = t_Hy_trans_dict[t]
        
        R_xt = H_xt @ R @ H_xt_trans
        R_yt = H_yt @ R @ H_yt_trans
        R_yt_inv = tf.linalg.inv(R_yt)
        R_xtyt = H_xt @ R @ H_yt_trans
        
        mu_yt = tf.matmul(H_yt, mu)
        mu_xt = tf.matmul(H_xt, mu)

        X_t_hat = R_xtyt @ R_yt_inv @ (y_t - mu_yt) + mu_xt
        
        # clip ratings
        predictions_t = tf.matmul(H_xt_trans, X_t_hat).numpy()[movie_ids_t-1]
        predictions_t = np.clip(predictions_t, 1, 5)
        
        # accumulate square_error and l
        square_error += tf.matmul(tf.transpose(labels_t - predictions_t), labels_t - predictions_t)
        l += len(labels_t)
    return np.sqrt(square_error/l)

In [109]:
em_mu = np.load('results/em_mu.npy')
em_R = np.load('results/em_R.npy')
rmse = evaluate(em_mu, em_R)
rmse

100%|██████████| 137328/137328 [02:02<00:00, 1125.11it/s]


array([[1.07507623]])

In [122]:
mcmichael_mu = np.load('results/mcmichael_mu.npy')
mcmichael_R = np.load('results/mcmichael_R.npy')
rmse = evaluate(mcmichael_mu, mcmichael_R)
rmse

100%|██████████| 137328/137328 [01:56<00:00, 1178.34it/s]


array([[1.07510565]])