# Product Recommendation
Reference: [https://ieeexplore.ieee.org/document/5430993](https://www2.seas.gwu.edu/~simhaweb/champalg/cf/papers/wroberts.pdf)

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.ops.linalg.sparse.sparse_csr_matrix_ops import *
from tensorflow.raw_ops import SparseMatrixAdd, SparseMatrixMatMul, SparseMatrixSparseMatMul, SparseMatrixZeros
from tqdm import tqdm

In [2]:
print(tf.config.experimental.list_physical_devices('GPU'))
tf.__version__

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


'2.3.0'

In [3]:
!nvidia-smi

Tue Oct 27 04:08:56 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P8    14W /  70W |     10MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Data Preprocessing

In [4]:
Y_data = pd.read_csv('Y.csv', header=None, names=['Rating','Movie','User'], dtype=np.int32) # training data
P_data = pd.read_csv('P.csv', header=None, names=['Rating','Movie','User'], dtype=np.int32) # test data ('probe-set' mentioned in paper)

# Y_data_full = pd.read_csv('Y_full.csv', header=None, names=['Rating','Movie','User'], dtype=np.int32) # training data
# P_data_full = pd.read_csv('P_full.csv', header=None, names=['Rating','Movie','User'], dtype=np.int32) # test data ('probe-set' mentioned in paper)

In [5]:
display(Y_data.head())
display(P_data.head())

Unnamed: 0,Rating,Movie,User
0,5,2,1
1,4,7,1
2,4,8,1
3,4,11,1
4,4,12,1


Unnamed: 0,Rating,Movie,User
0,3,6,1
1,5,96,1
2,3,1,2
3,3,33,3
4,5,42,4


In [6]:
Y_data.shape, P_data.shape

((3399874, 3), (189699, 3))

In [7]:
print(Y_data['Rating'].unique().max(), Y_data['Movie'].unique().max(), Y_data['User'].unique().max())
print(P_data['Rating'].unique().max(), P_data['Movie'].unique().max(), P_data['User'].unique().max())

5 100 137328
5 100 137328


In [8]:
k, n = Y_data['Movie'].unique().max(), Y_data['User'].unique().max()
k, n

(100, 137328)

In [9]:
def generate_indices_pair_list(data):
    user_id = 1
    indices_list = list()
    for index, row in tqdm(data.iterrows(), total=data.shape[0]):
        if row['User'] != user_id:
            user_id = row['User']
            indices_list.append(index - 1)

    indices_pair_list = list()
    for index_ending in indices_list:
        if index_ending == indices_list[0]:
            indices_pair_list.append((0, index_ending))
        else:
            index_beginning = indices_pair_list[-1][1] + 1
            indices_pair_list.append((index_beginning, index_ending))
    return indices_pair_list

In [10]:
indices_pair_list_Y_data = generate_indices_pair_list(Y_data)
indices_pair_list_P_data = generate_indices_pair_list(P_data)
len(indices_pair_list_Y_data) == len(indices_pair_list_P_data)

100%|██████████| 3399874/3399874 [03:35<00:00, 15785.70it/s]
100%|██████████| 189699/189699 [00:12<00:00, 15308.93it/s]


True

In [11]:
data_preprocessed = list()
for index_pair_Y_data, index_pair_P_data in tqdm(zip(indices_pair_list_Y_data, indices_pair_list_P_data), total=len(indices_pair_list_Y_data)):
    Y_data_t = Y_data.loc[index_pair_Y_data[0]:index_pair_Y_data[1], :]
    
    movie_ids_t_indices = (Y_data_t['Movie'].values - 1).astype(np.int64)
    movie_ids_t_indices_deleted = np.setdiff1d(np.arange(k, dtype=np.int64), movie_ids_t_indices)
    
    H_yt_st_indices = np.vstack((np.arange(movie_ids_t_indices.shape[0]), movie_ids_t_indices)).T
    H_xt_st_indices = np.vstack((np.arange(movie_ids_t_indices_deleted.shape[0]), movie_ids_t_indices_deleted)).T
    z_t_st_indices = np.vstack((movie_ids_t_indices, np.zeros(movie_ids_t_indices.shape[0], dtype=np.int64))).T
        
    H_yt_st = tf.SparseTensor(indices=H_yt_st_indices, values=np.ones(H_yt_st_indices.shape[0], dtype=np.float32), dense_shape=[H_yt_st_indices.shape[0], k])
    H_xt_st = tf.SparseTensor(indices=H_xt_st_indices, values=np.ones(H_xt_st_indices.shape[0], dtype=np.float32), dense_shape=[H_xt_st_indices.shape[0], k])
    z_t_st = tf.SparseTensor(indices=z_t_st_indices, values=Y_data_t['Rating'].values.astype(np.float32), dense_shape=[k, 1])

    H_yt_sm = sparse_tensor_to_csr_sparse_matrix(indices=H_yt_st.indices, values=H_yt_st.values, dense_shape=H_yt_st.dense_shape)
    H_xt_sm = sparse_tensor_to_csr_sparse_matrix(indices=H_xt_st.indices, values=H_xt_st.values, dense_shape=H_xt_st.dense_shape)
    z_t_sm = sparse_tensor_to_csr_sparse_matrix(indices=z_t_st.indices, values=z_t_st.values, dense_shape=z_t_st.dense_shape)
    
    y_t = csr_sparse_matrix_to_dense(SparseMatrixSparseMatMul(a=H_yt_sm, b=z_t_sm, type=tf.float32), tf.float32)
    k_t = tf.constant(H_yt_st_indices.shape[0], dtype=tf.float32)

    P_data_t = P_data.loc[index_pair_P_data[0]:index_pair_P_data[1], :]
    movie_ids_t_P_data = P_data_t['Movie'].values
    ratings_t_P_data = tf.expand_dims(P_data_t['Rating'].values.astype(np.float32), axis=1)
      
    data_preprocessed.append((H_yt_sm, H_xt_sm, y_t, k_t, movie_ids_t_P_data, ratings_t_P_data))

100%|██████████| 137327/137327 [04:46<00:00, 478.72it/s]


## Initialization

$\mu$ has 1 type available

$N = \sum_{t=1}^{n}H_{y_t}'H_{y_t}$

$\hat{\mu}^0 = N^{-1}\sum_{t-1}^{n}H_{y_t}'y_{t}$

In [14]:
# initial estimate of mu
N_sm = SparseMatrixZeros(dense_shape=(k, k), type=tf.float32)
H_yty_t = 0

for (H_yt_sm, H_xt_sm, y_t, k_t, movie_ids_t_P_data, ratings_t_P_data) in tqdm(data_preprocessed):
    N_sm = SparseMatrixAdd(a=N_sm, b=SparseMatrixSparseMatMul(a=H_yt_sm, b=H_yt_sm, type=tf.float32, transpose_a=True), alpha=1.0, beta=1.0)
    H_yty_t += SparseMatrixMatMul(a=H_yt_sm, b=y_t, transpose_a=True)

100%|██████████| 137327/137327 [01:27<00:00, 1564.25it/s]


In [15]:
# The ith diagonal element of N equals the total number of ratings of the ith product.
N_inv = tf.linalg.inv(csr_sparse_matrix_to_dense(N_sm, tf.float32))
mu_hat0 = tf.matmul(N_inv, H_yty_t)
tf.transpose(mu_hat0)

<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
array([[3.4526653, 3.5767686, 3.2878866, 3.9047875, 3.7903547, 3.4441562,
        3.190684 , 4.5283504, 3.8201292, 3.6159503, 3.4038272, 3.8372512,
        4.076039 , 4.228367 , 3.3539274, 4.0645275, 3.7211962, 3.4870086,
        4.1638894, 3.4097998, 3.86926  , 3.435835 , 3.2032225, 4.084879 ,
        3.2320046, 3.886682 , 4.331895 , 4.383559 , 4.316363 , 3.8659174,
        4.339757 , 3.8914747, 3.7002807, 3.3624778, 4.3289886, 4.0670323,
        4.56922  , 3.771041 , 3.6858559, 3.8453238, 4.345388 , 3.9099529,
        3.3994992, 3.6078188, 3.9626696, 4.143861 , 3.4072049, 3.7040153,
        4.0034695, 4.6428022, 3.216206 , 3.7723858, 4.265651 , 4.4537544,
        3.8384895, 3.793742 , 3.76288  , 3.8869822, 3.8004174, 4.346952 ,
        3.8046887, 3.8462136, 3.6412156, 3.2722168, 3.4232938, 3.7163155,
        3.2069893, 4.4541044, 4.265392 , 3.8610888, 4.483009 , 4.3621464,
        3.5388138, 4.1171074, 3.8946686, 3.3607738, 4.179405 ,

R has 4 types available

$R_{1} = I$

$R_{2} = N^{-1}diag(S)$

$R_{3} = diag(S)^{-1/2}Sdiag(S)^{-1/2}$

$R_{4} = N^{-1/2}SN^{-1/2}$

where $S = \sum_{t=1}^{n}H_{y_{t}}'(y_t - H_{y_{t}}\hat{\mu}^0)(y_t - H_{y_{t}}\hat{\mu}^0)'H_{y_{t}}$

In [16]:
# initial estimates of R (4 types available)
R_hat0_1 = tf.eye(k, dtype=tf.float32)
R_hat0_1

<tf.Tensor: shape=(100, 100), dtype=float32, numpy=
array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)>

In [17]:
S = 0
for (H_yt_sm, H_xt_sm, y_t, k_t, movie_ids_t_P_data, ratings_t_P_data) in tqdm(data_preprocessed):
    Hytmu_hat0 = SparseMatrixMatMul(a=H_yt_sm, b=mu_hat0)
    intermediate_result = SparseMatrixMatMul(a=H_yt_sm, b=y_t - Hytmu_hat0, transpose_a=True)
    S += tf.matmul(intermediate_result, intermediate_result, transpose_b=True)

100%|██████████| 137327/137327 [00:26<00:00, 5130.41it/s]


In [18]:
# diag_S is the diagonal matrix consisting of the diagonal elements of S
diag_S = tf.linalg.diag(tf.linalg.tensor_diag_part(S))
R_hat0_2 = tf.matmul(N_inv, diag_S)
R_hat0_2

<tf.Tensor: shape=(100, 100), dtype=float32, numpy=
array([[1.724364  , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.94218737, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.4365153 , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.1832287 , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.0349729 ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.2620498 ]], dtype=float32)>

In [19]:
# R_hat0_3 is not a good initializer when rating variances are far from one
diag_S_inv = tf.linalg.inv(diag_S)
R_hat0_3 = tf.linalg.sqrtm(diag_S_inv) @ S @ tf.linalg.sqrtm(diag_S_inv)
R_hat0_3

<tf.Tensor: shape=(100, 100), dtype=float32, numpy=
array([[ 1.        ,  0.07418733, -0.01158332, ..., -0.01462825,
        -0.02215315, -0.0184503 ],
       [ 0.07418733,  0.99999994,  0.03674517, ...,  0.02561875,
         0.03563373,  0.0392758 ],
       [-0.01158332,  0.03674516,  0.9999999 , ...,  0.1095439 ,
         0.12823555,  0.15562423],
       ...,
       [-0.01462825,  0.02561875,  0.1095439 , ...,  1.0000001 ,
         0.19780138,  0.15164192],
       [-0.02215315,  0.03563373,  0.12823555, ...,  0.1978014 ,
         1.0000001 ,  0.18996929],
       [-0.0184503 ,  0.0392758 ,  0.15562423, ...,  0.15164192,
         0.18996929,  1.0000001 ]], dtype=float32)>

In [20]:
# R_hat0_4 = tf.matmul(tf.linalg.sqrtm(N_inv), tf.matmul(S, tf.linalg.sqrtm(N_inv)))
R_hat0_4 = tf.linalg.sqrtm(N_inv) @ S @ tf.linalg.sqrtm(N_inv)
R_hat0_4

<tf.Tensor: shape=(100, 100), dtype=float32, numpy=
array([[ 1.724364  ,  0.09456117, -0.01823068, ..., -0.02089494,
        -0.02959474, -0.02721799],
       [ 0.09456117,  0.94218737,  0.04274881, ...,  0.0270496 ,
         0.03518798,  0.0428284 ],
       [-0.01823068,  0.04274881,  1.4365153 , ...,  0.14281628,
         0.15636086,  0.20954177],
       ...,
       [-0.02089494,  0.0270496 ,  0.14281628, ...,  1.1832289 ,
         0.21889113,  0.18530701],
       [-0.02959474,  0.03518798,  0.15636086, ...,  0.21889113,
         1.0349729 ,  0.21711312],
       [-0.02721799,  0.04282841,  0.20954175, ...,  0.18530701,
         0.21711312,  1.2620498 ]], dtype=float32)>

## McMichael’s Algorithm

1. $\hat{R}^{i+1} = \hat{R}^{i} + \gamma \hat{R}^{i} \bigg(\frac{d}{dR} \log{p}(y^n; \mu, R)|_{R = \hat{R}^i} \bigg) \hat{R}^{i}$ <br />
where $\frac{d}{dR} \log{p}(y^n; \mu, R)|_{R = \hat{R}^i} = -\frac{1}{2} \sum_{t=1}^{n} H_{y_t}' \Big(\big(R_{y_t}^{i}\big)^{-1} - \big(R_{y_t}^{i}\big)^{-1} (y_t - \mu_{y_t}) (y_t - \mu_{y_t})' \big(R_{y_t}^{i}\big)^{-1}\Big) H_{y_t}$

2. $\hat{\mu} = \big(\sum_{t=1}^{n}H_{y_t}'R_{y_t}^{-1}H_{y_t}\big)^{-1} \big(\sum_{t=1}^{n}H_{y_t}'R_{y_t}^{-1}y_t\big)$

In [21]:
@tf.function(experimental_relax_shapes=True)
def run_graph_mcmichael(mu, R, y_t, H_yt_sm, k_t):
    # for R estimation
    # R_yt = H_yt @ R @ H_yt_trans
    intermediate_result = SparseMatrixMatMul(a=H_yt_sm, b=R)
    R_yt = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)

    R_yt_det = tf.linalg.det(R_yt)
    R_yt_inv = tf.linalg.inv(R_yt)
    # mu_yt = tf.matmul(H_yt, mu)
    mu_yt = SparseMatrixMatMul(a=H_yt_sm, b=mu)
    yt_minus_mu_yt = y_t - mu_yt

    # log_p_gradient_part = H_yt_trans @ (R_yt_inv - R_yt_inv @ (y_t - mu_yt) @ tf.transpose(y_t - mu_yt) @ R_yt_inv) @ H_yt
    intermediate_result_1 = R_yt_inv @ yt_minus_mu_yt      
    intermediate_result_2 = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv - tf.matmul(intermediate_result_1, intermediate_result_1, transpose_b=True), transpose_a=True)
    log_p_gradient_part = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result_2, transpose_a=True, transpose_b=True, transpose_output=True)

    # for mu estimation
    # Hyt_trans_Ryt_inv_Hyt_sum_part = H_yt_trans @ R_yt_inv @ H_yt
    intermediate_result = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv, transpose_a=True, transpose_b=True, transpose_output=True)
    Hyt_trans_Ryt_inv_Hyt_sum_part = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_a=True)
    # Hyt_trans_Ryt_inv_yt_sum_part = H_yt_trans @ R_yt_inv @ y_t
    Hyt_trans_Ryt_inv_yt_sum_part = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv @ y_t, transpose_a=True)
    
    # for log likelihood calculation
    log_p_hat_part = -1/2*(tf.math.log(R_yt_det) + tf.transpose(yt_minus_mu_yt) @ R_yt_inv @ (yt_minus_mu_yt) + k_t*LOG_2PI)
    
    return log_p_gradient_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part

In [24]:
def mcmichael(mu, R):
    gamma = 0.00001
    Hyt_trans_Ryt_inv_Hyt_sum = 0
    Hyt_trans_Ryt_inv_yt_sum = 0
    log_p_gradient = 0
    log_p_hat = 0

    for (H_yt_sm, H_xt_sm, y_t, k_t, movie_ids_t_P_data, ratings_t_P_data) in tqdm(data_preprocessed):
        # log_p_gradient_part, Hyt_trans_Ryt_inv_Hyt_sum_part, Hyt_trans_Ryt_inv_yt_sum_part, log_p_hat_part = run_graph_mcmichael(mu, R, y_t, H_yt_sm, k_t)

        # for R estimation
        # R_yt = H_yt @ R @ H_yt_trans
        intermediate_result = SparseMatrixMatMul(a=H_yt_sm, b=R)
        R_yt = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)

        R_yt_det = tf.linalg.det(R_yt)
        R_yt_inv = tf.linalg.inv(R_yt)
        # mu_yt = tf.matmul(H_yt, mu)
        mu_yt = SparseMatrixMatMul(a=H_yt_sm, b=mu)
        yt_minus_mu_yt = y_t - mu_yt

        # log_p_gradient_part = H_yt_trans @ (R_yt_inv - R_yt_inv @ (y_t - mu_yt) @ tf.transpose(y_t - mu_yt) @ R_yt_inv) @ H_yt
        intermediate_result_1 = R_yt_inv @ yt_minus_mu_yt      
        intermediate_result_2 = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv - tf.matmul(intermediate_result_1, intermediate_result_1, transpose_b=True), transpose_a=True)
        log_p_gradient_part = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result_2, transpose_a=True, transpose_b=True, transpose_output=True)

        # for mu estimation
        # Hyt_trans_Ryt_inv_Hyt_sum_part = H_yt_trans @ R_yt_inv @ H_yt
        intermediate_result = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv, transpose_a=True, transpose_b=True, transpose_output=True)
        Hyt_trans_Ryt_inv_Hyt_sum_part = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_a=True)
        # Hyt_trans_Ryt_inv_yt_sum_part = H_yt_trans @ R_yt_inv @ y_t
        Hyt_trans_Ryt_inv_yt_sum_part = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv @ y_t, transpose_a=True)
        
        # for log likelihood calculation
        log_p_hat_part = -1/2*(tf.math.log(R_yt_det) + tf.transpose(yt_minus_mu_yt) @ R_yt_inv @ (yt_minus_mu_yt) + k_t*LOG_2PI)
        
        log_p_gradient += log_p_gradient_part
        Hyt_trans_Ryt_inv_Hyt_sum += Hyt_trans_Ryt_inv_Hyt_sum_part
        Hyt_trans_Ryt_inv_yt_sum += Hyt_trans_Ryt_inv_yt_sum_part
        log_p_hat += log_p_hat_part
        
    R_hat = R + gamma*(R @ (-1/2*log_p_gradient) @ R)
    mu_hat = tf.matmul(tf.linalg.inv(Hyt_trans_Ryt_inv_Hyt_sum), Hyt_trans_Ryt_inv_yt_sum)
    return mu_hat, R_hat, log_p_hat

In [None]:
delta = 0.0005
mu = mu_hat0
R = R_hat0_4
log_p = tf.constant(-np.inf, dtype=tf.float32)
LOG_2PI = tf.math.log(2*tf.constant(np.pi, dtype=tf.float32))

for i in range(40):
    if i % 5 == 0:
        print(f'iteration: {i}')
    
    mu_hat, R_hat, log_p_hat = mcmichael(mu, R)
    convergence_criterion = log_p_hat/n - log_p/n < delta
    
    print(f'normalized log_p_hat: {(log_p_hat/n).numpy().flatten()[0]:.5}')
    print(f'normalized log_p:     {(log_p/n).numpy().flatten()[0]:.5}')
    print(f'convergence gap:      {(log_p_hat/n - log_p/n).numpy().flatten()[0]:.5}')
    
    if convergence_criterion:
        break
        
    # use new estimattions for next iteration
    mu = mu_hat
    R = R_hat
    log_p = log_p_hat

  0%|          | 38/137327 [00:00<06:12, 368.54it/s]

iteration: 0


100%|██████████| 137327/137327 [04:43<00:00, 483.62it/s]
  0%|          | 42/137327 [00:00<05:29, 416.44it/s]

normalized log_p_hat: -32.232
normalized log_p:     -inf
convergence gap:      inf


100%|██████████| 137327/137327 [04:42<00:00, 486.89it/s]
  0%|          | 50/137327 [00:00<04:34, 499.45it/s]

normalized log_p_hat: -32.007
normalized log_p:     -32.232
convergence gap:      0.22573


 92%|█████████▏| 126111/137327 [04:18<00:23, 484.05it/s]

In [None]:
np.save('results/mcmichael_mu.npy', mu_hat)
np.save('results/mcmichael_R.npy', R_hat)
np.save('results/mcmichael_log_p.npy', log_p_hat)

## Evaluation

$\epsilon^2 = \frac{\sum_{t=1}^{n}\big(x_t - \hat{X_t}\big)'\big(x_t - \hat{X_t}\big)}{\sum_{t=1}^{n} l_t}$

In [None]:
@tf.function(experimental_relax_shapes=True)
def run_graph_square_error(mu, R, movie_ids_t_P_data, ratings_t_P_data, y_t, H_xt_sm, H_yt_sm):
    # calculate X_t_hat
    # R_xt = H_xt @ R @ H_xt_trans
    # R_yt = H_yt @ R @ H_yt_trans
    # R_yt_inv = tf.linalg.inv(R_yt)
    # R_xtyt = H_xt @ R @ H_yt_trans

    # mu_yt = tf.matmul(H_yt, mu)
    # mu_xt = tf.matmul(H_xt, mu)

    intermediate_result = SparseMatrixMatMul(a=H_xt_sm, b=R)
    R_xt = SparseMatrixMatMul(a=H_xt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)
    
    intermediate_result = SparseMatrixMatMul(a=H_yt_sm, b=R)
    R_yt = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)
    R_yt_inv = tf.linalg.inv(R_yt)

    intermediate_result = SparseMatrixMatMul(a=H_xt_sm, b=R)
    R_xtyt = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)

    mu_yt = SparseMatrixMatMul(a=H_yt_sm, b=mu)
    mu_xt = SparseMatrixMatMul(a=H_xt_sm, b=mu)

    X_t_hat = R_xtyt @ R_yt_inv @ (y_t - mu_yt) + mu_xt
    
    # clip ratings
    # predictions_t = tf.gather(tf.matmul(H_xt_trans, X_t_hat), indices=movie_ids_t-1)
    predictions_t = tf.gather(SparseMatrixMatMul(a=H_xt_sm, b=X_t_hat, transpose_a=True), indices=movie_ids_t_P_data-1)
    predictions_t = tf.clip_by_value(predictions_t, 1, 5)
    
    return tf.matmul(tf.transpose(ratings_t_P_data - predictions_t), ratings_t_P_data - predictions_t)

In [None]:
def evaluate(mu, R):
    square_error = 0
    l = 0
    for (H_yt_sm, H_xt_sm, y_t, k_t, movie_ids_t_P_data, ratings_t_P_data) in tqdm(data_preprocessed):
        square_error += run_graph_square_error(mu, R, movie_ids_t_P_data, ratings_t_P_data, y_t, H_xt_sm, H_yt_sm)
        l += len(ratings_t_P_data)
    return np.sqrt(square_error/l)

In [None]:
mcmichael_mu = np.load('results/mcmichael_mu.npy')
mcmichael_R = np.load('results/mcmichael_R.npy')
rmse = evaluate(mcmichael_mu, mcmichael_R)
rmse