# Product Recommendation Training and Evaluation
Reference: [https://ieeexplore.ieee.org/document/5430993](https://www2.seas.gwu.edu/~simhaweb/champalg/cf/papers/wroberts.pdf)

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.ops.linalg.sparse.sparse_csr_matrix_ops import *
from tensorflow.raw_ops import SparseMatrixAdd, SparseMatrixMatMul, SparseMatrixSparseMatMul, SparseMatrixZeros
from tqdm import tqdm

In [2]:
print(tf.config.experimental.list_physical_devices('GPU'))
tf.__version__

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]


'2.3.1'

In [3]:
!nvidia-smi

Sun Nov 29 01:05:15 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:00:1B.0 Off |                    0 |
| N/A   44C    P0    38W / 300W |      3MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:00:1C.0 Off |                    0 |
| N/A   47C    P0    37W / 300W |      3MiB / 16160MiB |      0%      Default |
|       

In [4]:
!lscpu

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                32
On-line CPU(s) list:   0-31
Thread(s) per core:    2
Core(s) per socket:    16
Socket(s):             1
NUMA node(s):          1
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 79
Model name:            Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
Stepping:              1
CPU MHz:               2665.895
CPU max MHz:           3000.0000
CPU min MHz:           1200.0000
BogoMIPS:              4600.00
Hypervisor vendor:     Xen
Virtualization type:   full
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              46080K
NUMA node0 CPU(s):     0-31
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq ssse3 

In [5]:
k, n = 17770, 480189

In [6]:
def parse_fn(data_element_serialized):    
    features = {
        'H_yt_st': tf.io.FixedLenSequenceFeature([], tf.string, allow_missing=True),
        'H_xt_st': tf.io.FixedLenSequenceFeature([], tf.string, allow_missing=True),
        'y_t': tf.io.FixedLenSequenceFeature([], tf.float32, allow_missing=True),
        'k_t': tf.io.FixedLenSequenceFeature([], tf.float32, allow_missing=True),
        'movie_ids_t_P_data': tf.io.FixedLenSequenceFeature([], tf.float32, allow_missing=True),
        'ratings_t_P_data': tf.io.FixedLenSequenceFeature([], tf.float32, allow_missing=True),
    }
    
    features_parsed = tf.io.parse_single_example(serialized=data_element_serialized, features=features)
    
    # tf.io.deserialize_many_sparse() requires the dimensions to be [batch_size, 3], so we add an additional dimension, which will be removed later
    H_yt_st = tf.io.deserialize_many_sparse(tf.expand_dims(features_parsed['H_yt_st'], axis=0), dtype=tf.float32)
    H_xt_st = tf.io.deserialize_many_sparse(tf.expand_dims(features_parsed['H_xt_st'], axis=0), dtype=tf.float32)
    features_parsed['H_yt_st'] = tf.sparse.reshape(H_yt_st, tf.shape(H_yt_st)[1:])
    features_parsed["H_xt_st"] = tf.sparse.reshape(H_xt_st, tf.shape(H_xt_st)[1:])
    features_parsed['y_t'] = tf.expand_dims(features_parsed['y_t'], axis=1)
    features_parsed['k_t'] = tf.expand_dims(features_parsed['k_t'], axis=1)
    features_parsed['movie_ids_t_P_data'] = features_parsed['movie_ids_t_P_data']
    features_parsed['ratings_t_P_data'] = tf.expand_dims(features_parsed['ratings_t_P_data'], axis=1)
    return features_parsed

In [7]:
data_preprocessed = tf.data.TFRecordDataset(['data/data_preprocessed.tfrecord']).map(parse_fn)

In [8]:
%%time
mu_hat0 = tf.constant(np.load('mu_hat0.npy'), tf.float32)
R_hat0_4 = tf.constant(np.load('R_hat0_4.npy'), tf.float32)

CPU times: user 390 ms, sys: 1.06 s, total: 1.45 s
Wall time: 1.45 s


## McMichael’s Algorithm

1. $\hat{R}^{i+1} = \hat{R}^{i} + \gamma \hat{R}^{i} \bigg(\frac{d}{dR} \log{p}(y^n; \mu, R)|_{R = \hat{R}^i} \bigg) \hat{R}^{i}$ <br />
where $\frac{d}{dR} \log{p}(y^n; \mu, R)|_{R = \hat{R}^i} = -\frac{1}{2} \sum_{t=1}^{n} H_{y_t}' \Big(\big(R_{y_t}^{i}\big)^{-1} - \big(R_{y_t}^{i}\big)^{-1} (y_t - \mu_{y_t}) (y_t - \mu_{y_t})' \big(R_{y_t}^{i}\big)^{-1}\Big) H_{y_t}$

2. $\hat{\mu} = \big(\sum_{t=1}^{n}H_{y_t}'R_{y_t}^{-1}H_{y_t}\big)^{-1} \big(\sum_{t=1}^{n}H_{y_t}'R_{y_t}^{-1}y_t\big)$

In [9]:
def mcmichael(mu, R):
    gamma = 0.00001
    Hyt_trans_Ryt_inv_Hyt_sum = 0
    Hyt_trans_Ryt_inv_yt_sum = 0
    log_p_gradient = 0
    log_p_hat = 0
    
    for features_parsed in tqdm(data_preprocessed, total=n):
        H_yt_st, y_t, k_t = features_parsed['H_yt_st'], features_parsed['y_t'], features_parsed['k_t']
        
        with tf.device('/GPU:0'):
            H_yt_sm = sparse_tensor_to_csr_sparse_matrix(indices=H_yt_st.indices, values=H_yt_st.values, dense_shape=H_yt_st.dense_shape)
            # for R estimation
            # R_yt = H_yt @ R @ H_yt_trans
            R_yt = SparseMatrixMatMul(a=H_yt_sm, b=SparseMatrixMatMul(a=H_yt_sm, b=R), transpose_b=True, transpose_output=True)
            R_yt_inv = tf.linalg.inv(R_yt)
            yt_minus_mu_yt = y_t - SparseMatrixMatMul(a=H_yt_sm, b=mu)

            # for log likelihood calculation
            log_p_hat_part = -1/2*(tf.math.log(tf.linalg.det(R_yt)) + tf.transpose(yt_minus_mu_yt) @ R_yt_inv @ (yt_minus_mu_yt) + k_t*LOG_2PI)
            log_p_hat += log_p_hat_part
        
        with tf.device('/GPU:1'):
            # for R estimation
            # log_p_gradient_part = H_yt_trans @ (R_yt_inv - R_yt_inv @ (y_t - mu_yt) @ tf.transpose(y_t - mu_yt) @ R_yt_inv) @ H_yt
            intermediate_result = R_yt_inv @ yt_minus_mu_yt
            intermediate_result_2 = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv - tf.matmul(intermediate_result, intermediate_result, transpose_b=True), transpose_a=True)
            log_p_gradient_part = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result_2, transpose_a=True, transpose_b=True, transpose_output=True)
            log_p_gradient += log_p_gradient_part
        
        
        with tf.device('/GPU:2'):
            # for mu estimation
            # Hyt_trans_Ryt_inv_Hyt_sum_part = H_yt_trans @ R_yt_inv @ H_yt
            Hyt_trans_Ryt_inv_Hyt_sum_part = SparseMatrixMatMul(a=H_yt_sm, b=SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv, transpose_a=True, transpose_b=True, transpose_output=True), transpose_a=True)
            # Hyt_trans_Ryt_inv_yt_sum_part = H_yt_trans @ R_yt_inv @ y_t
            Hyt_trans_Ryt_inv_yt_sum_part = SparseMatrixMatMul(a=H_yt_sm, b=R_yt_inv @ y_t, transpose_a=True)
            Hyt_trans_Ryt_inv_Hyt_sum += Hyt_trans_Ryt_inv_Hyt_sum_part
        
        
        with tf.device('/GPU:3'):
            Hyt_trans_Ryt_inv_yt_sum += Hyt_trans_Ryt_inv_yt_sum_part

    R_hat = R + gamma*(R @ (-1/2*log_p_gradient) @ R)
    mu_hat = tf.matmul(tf.linalg.inv(Hyt_trans_Ryt_inv_Hyt_sum), Hyt_trans_Ryt_inv_yt_sum)
    return mu_hat, R_hat, log_p_hat

In [None]:
# delta = 0.0005
delta = 0.2
mu = mu_hat0
R = R_hat0_4
log_p = tf.constant(-np.inf, dtype=tf.float32)
LOG_2PI = tf.math.log(2*tf.constant(np.pi, dtype=tf.float32))

for i in range(40):
    if i % 5 == 0:
        print(f'iteration: {i}')
    
    mu_hat, R_hat, log_p_hat = mcmichael(mu, R)
    convergence_criterion = log_p_hat/n - log_p/n < delta
    
    print(f'normalized log_p_hat: {(log_p_hat/n).numpy().flatten()[0]:.5}')
    print(f'normalized log_p:     {(log_p/n).numpy().flatten()[0]:.5}')
    print(f'convergence gap:      {(log_p_hat/n - log_p/n).numpy().flatten()[0]:.5}')
    
    if convergence_criterion:
        break
        
    # use new estimattions for next iteration
    mu = mu_hat
    R = R_hat
    log_p = log_p_hat

  0%|          | 0/480189 [00:00<?, ?it/s]

iteration: 0


 89%|████████▉ | 427476/480189 [3:29:56<23:54, 36.74it/s]  

In [None]:
np.save('results/mcmichael_mu.npy', mu_hat)
np.save('results/mcmichael_R.npy', R_hat)
np.save('results/mcmichael_log_p.npy', log_p_hat)

## Evaluation

$\epsilon^2 = \frac{\sum_{t=1}^{n}\big(x_t - \hat{X_t}\big)'\big(x_t - \hat{X_t}\big)}{\sum_{t=1}^{n} l_t}$

In [None]:
@tf.function(experimental_relax_shapes=True)
def run_graph_square_error(mu, R, movie_ids_t_P_data, ratings_t_P_data, y_t, H_xt_sm, H_yt_sm):
    # calculate X_t_hat
    # R_xt = H_xt @ R @ H_xt_trans
    # R_yt = H_yt @ R @ H_yt_trans
    # R_yt_inv = tf.linalg.inv(R_yt)
    # R_xtyt = H_xt @ R @ H_yt_trans

    # mu_yt = tf.matmul(H_yt, mu)
    # mu_xt = tf.matmul(H_xt, mu)

    intermediate_result = SparseMatrixMatMul(a=H_xt_sm, b=R)
    R_xt = SparseMatrixMatMul(a=H_xt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)
    
    intermediate_result = SparseMatrixMatMul(a=H_yt_sm, b=R)
    R_yt = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)
    R_yt_inv = tf.linalg.inv(R_yt)

    intermediate_result = SparseMatrixMatMul(a=H_xt_sm, b=R)
    R_xtyt = SparseMatrixMatMul(a=H_yt_sm, b=intermediate_result, transpose_b=True, transpose_output=True)

    mu_yt = SparseMatrixMatMul(a=H_yt_sm, b=mu)
    mu_xt = SparseMatrixMatMul(a=H_xt_sm, b=mu)

    X_t_hat = R_xtyt @ R_yt_inv @ (y_t - mu_yt) + mu_xt
    
    # clip ratings
    # predictions_t = tf.gather(tf.matmul(H_xt_trans, X_t_hat), indices=movie_ids_t-1)
    predictions_t = tf.gather(SparseMatrixMatMul(a=H_xt_sm, b=X_t_hat, transpose_a=True), indices=movie_ids_t_P_data-1)
    predictions_t = tf.clip_by_value(predictions_t, 1, 5)
    
    return tf.matmul(tf.transpose(ratings_t_P_data - predictions_t), ratings_t_P_data - predictions_t)

In [None]:
def evaluate(mu, R):
    square_error = 0
    l = 0
    for (H_yt_sm, H_xt_sm, y_t, k_t, movie_ids_t_P_data, ratings_t_P_data) in tqdm(data_preprocessed):
        if movie_ids_t_P_data is not None:
            square_error += run_graph_square_error(mu, R, movie_ids_t_P_data, ratings_t_P_data, y_t, H_xt_sm, H_yt_sm)
            l += len(ratings_t_P_data)
    return np.sqrt(square_error/l)

In [None]:
mcmichael_mu = np.load('results/mcmichael_mu.npy')
mcmichael_R = np.load('results/mcmichael_R.npy')
rmse = evaluate(mcmichael_mu, mcmichael_R)
rmse