# Training a Recommendation System ⚙️

In [2]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import os
current_dir = Path.cwd().parent
os.chdir(current_dir)
print(f"Current working directory is now: {Path.cwd()}")

import numpy as np
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

from rsdb.preprocess.data_preprocessing import get_clean_review_data
from rsdb.features.featuring import featuring_engineering

url = "https://datarepo.eng.ucsd.edu/mcauley_group/gdrive/googlelocal/review-California_10.json.gz"
meta_url = "https://datarepo.eng.ucsd.edu/mcauley_group/gdrive/googlelocal/meta-California.json.gz"

Current working directory is now: /Users/kevinb/Desktop/cse158/RSDB


In [None]:
cleaned_df = get_clean_review_data(url,meta_url)
featured_df = featuring_engineering(cleaned_df)

In [4]:
from rsdb.models.tdlf.temporal_dynamic_v import TemporalDynamicVariants
from rsdb.models.fpmc.fpmc_v import FPMCVariants
from rsdb.train import tdlf_df_to_tf_dataset, fpmc_df_to_tf_dataset

Let's view the featured data frame

In [None]:
featured_df

# Prepare Features

In [None]:
data_query = featured_df[['gmap_id', 'reviewer_id', 'rating']]
train_df = featured_df.sample(frac=0.8, random_state=42)
test_df = featured_df.drop(train_df.index)
featured_df.info()

# Temporal Dynamic Latent Factor Model With Neural Correlative Variants (TDLF-V)

This is a model with many assumptions. Notice that item $i$ refers to the business and user $u$ refers to the user.


$$
\hat{r}_{u,i,t} = \mu + \beta_i + \beta_i(t) + \beta_u + \alpha_u \cdot \text{dev}_u(t) + f(\gamma_u, \gamma_i)
$$

$$
\beta_i(t) = \beta_i + \beta_{i,\text{bin}}(t) + \beta_{i,\text{period}}(t)
$$


**Static User/Item Bias**:
- Static bias for item $ i $:
  $$
  \beta_i = \text{Embedding}(\text{gmap\_id})
  $$
- Static bias for user $ u $:
  $$
  \beta_u = \text{Embedding}(\text{reviewer\_id})
  $$


**Temporal User/Item Bias**:
- Temporal bias for item $ i $ based on time bins:
  $$
  \beta_i(t) = \text{Embedding}(\text{time\_bin})
  $$
- Temporal deviation for user $ u $:
  $$
  \text{dev}_u(t) = \text{sgn}(t - \bar{t}_u) \cdot |t - \bar{t}_u|^{0.4}
  $$
  - $ t $: Timestamp of the rating.
  - $ \bar{t}_u $: Mean timestamp of user $ u $'s ratings.
  - $ \text{sgn}(x) $: Sign function, returning $ -1 $ if $ x < 0 $, and $ 1 $ otherwise.
- Scaled user deviation:
  $$
  \alpha_u \cdot \text{dev}_u(t)
  $$
  - $ \alpha_u $: Trainable scaling factor for user $ u $.

**Latent Interaction**:
- User embedding:
  $$
  \gamma_u = \text{Embedding}(\text{reviewer\_id})
  $$
- Item embedding:
  $$
  \gamma_i = \text{Embedding}(\text{gmap\_id})
  $$
- Interaction between user and item embeddings is the following where $ \text{NN} $ is a dense neural network:
  $$
  f(\gamma_u, \gamma_i) = \text{NN}([\gamma_u, \gamma_i])
  $$

**Final Prediction**

$$
\gamma_{u,k}(t) = \gamma_{u,k} + \alpha_{u,k} \cdot \text{dev}_u(t) + \gamma_{u,k,t}
$$

$$
\hat{r}_{u,i,t} = \mu + b_i + b_i(t) + b_u + \alpha_u \cdot \text{dev}_u(t) + f(\gamma_{u,k}(t), \gamma_{i,k})
$$

**Optimization**:

$$
\arg \min_{\alpha, \beta, \gamma} \sum_{u,i} \left(\mu + b_i + b_i(t) + b_u + \alpha_u \cdot \text{dev}_u(t) + f(\gamma_{u,k}(t), \gamma_{i,k}) - R_{u,i} \right)^2 + \lambda \left[ \sum_u \beta_u^2 + \sum_i \beta_i^2 + \sum_i \left\| \gamma_i \right\|_2^2 + \sum_u \left\| \gamma_u \right\|_2^2 \right]
$$

**Variants**:

$$
\hat{r}_{u,i,t} = 
\underbrace{\mu}_{\text{Global bias}} + 
\underbrace{b_i}_{\text{Static item bias}} + 
\underbrace{b_i(t)}_{\text{Dynamic item bias (time-dependent)}} + 
\underbrace{b_u}_{\text{Static user bias}} + 
\underbrace{\alpha_u \cdot \text{dev}_u(t)}_{\text{User temporal deviation bias}} + 
\underbrace{f(\gamma_{u,k}(t), \gamma_{i,k})}_{\text{Interaction score}} + 
\underbrace{\mathbf{F}_{\text{item}} \cdot \mathbf{W}_{\text{item}}}_{\text{Item-specific feature effect}}
$$

**Variants Optimization**:

$$
\arg \min_{\alpha, \beta, \gamma, \mathbf{W}} \sum_{u,i} 
\left(
\mu + b_i + b_i(t) + b_u + \alpha_u \cdot \text{dev}_u(t) + f(\gamma_{u,k}(t), \gamma_{i,k}) + \mathbf{F}_{\text{item}} \cdot \mathbf{W}_{\text{item}} - R_{u,i}
\right)^2 
+ 
\lambda \left( \sum_u b_u^2 + \sum_i b_i^2 + \sum_u \|\gamma_u\|_2^2 + \sum_i \|\gamma_i\|_2^2 + \sum \|\mathbf{W}\|_2^2 \right).
$$


In [46]:
train_data = tdlf_df_to_tf_dataset(train_df).shuffle(1024).batch(4096)
test_data = tdlf_df_to_tf_dataset(test_df).batch(4096)

In [None]:
# demo purpose
embedding_dim = 30
dense_units = 30
l2_reg = 1e-3
time_bins= 20
model = TemporalDynamicVariants(l2_reg, dense_units, embedding_dim, data_query, time_bins)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_root_mean_squared_error", 
    patience=10,
    min_delta=0.001,
    restore_best_weights=True
)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2, 
    decay_steps=1000, 
    decay_rate=0.8
)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule))
model.fit(train_data, epochs=500, validation_data=test_data, callbacks=[early_stopping])

In [None]:
test_metrics = model.evaluate(test_data, return_dict=True)
print(f"Test RMSE: {test_metrics['root_mean_squared_error']}")

predictions = []
actual_ratings = []
for batch in test_data:
    predicted_ratings = model(batch).numpy()
    actual_ratings.extend(batch["rating"].numpy())
    predictions.extend(predicted_ratings)

predictions = np.array(predictions)
actual_ratings = np.array(actual_ratings)

rmse = np.sqrt(np.mean((predictions - actual_ratings) ** 2))
print(f"RECHECK RMSE: {rmse}")

if np.all(actual_ratings == actual_ratings.round()):
    correct = np.mean(predictions.round() == actual_ratings)
    print(f"Rounded Accuracy: {correct}")
else:
    print("Actual ratings are not integers, skipping rounded accuracy calculation.")

# Factorized Personalized Markov Chain Variants (FMPC-V)

This is a model with less assumptions.

$$
p(i_{t+1} \mid i_t, u) = p(i_{t+1} \mid i_t, u)
$$

In Factorized Personalized Markov Chain (FPMC), we do this calculation by:

$$
f(i \mid u, j) = \underbrace{\gamma_{ui} \cdot \gamma_{iu}}_{\mathclap{f(i \mid u)}} + \underbrace{\gamma_{ij} \cdot \gamma_{ji}}_{\mathclap{f(i \mid j)}} + \underbrace{\gamma_{uj} \cdot \gamma_{ju}}_{\mathclap{f(u, j)}}.
$$

Neglecting independent terms:

$$
f(i \mid u, j) = 
\underbrace{\gamma_{ui} \cdot \gamma_{iu}}_{\text{user's compatibility with the next item}} + 
\underbrace{\gamma_{ij} \cdot \gamma_{ji}}_{\text{next item's compatibility with the previous item}}
$$

For our variants:

$$
f(i \mid u, j, \mathbf{F}) = 
\underbrace{\gamma_{ui} \cdot \gamma_{iu}}_{\text{user's compatibility with the next item}} + 
\underbrace{\gamma_{ij} \cdot \gamma_{ji}}_{\text{next item's compatibility with the previous item}} + 
\underbrace{\beta_u + \beta_i}_{\text{user and next-item biases}} + 
\underbrace{\mathbf{w}^\top \mathbf{F}_{\text{cat}}}_{\text{categorical feature embeddings}} + 
\underbrace{\mathbf{v}^\top \mathbf{F}_{\text{num}}}_{\text{numerical feature embeddings}} + 
\underbrace{b_g}_{\text{global bias}}
$$


Where
- $\gamma_{ui}, \gamma_{iu}, \gamma_{ij}, \gamma_{ji}: \text{Embedding vectors capturing user-item and item-item interactions.}$
- $\beta_u, \beta_i: \text{Bias terms for the user and the next item.}$
- $\mathbf{F}_{\text{cat}}: \text{Categorical feature embeddings.}$
- $\mathbf{F}_{\text{num}}: \text{Dense representations of numerical features (e.g., from a dense layer).}$
- $\mathbf{w}, \mathbf{v}: \text{Learnable weights for categorical and numerical features, respectively.}$
- $b_g: \text{Global bias.}$


In [58]:
train_data = fpmc_df_to_tf_dataset(train_df).shuffle(1024).batch(4096)
test_data = fpmc_df_to_tf_dataset(test_df).batch(4096)

In [None]:
# for demo purpose
embedding_dim = 30
l2_reg = 1e-3
lr = 1e-3
model = FPMCVariants(l2_reg=l2_reg, embedding_dim=embedding_dim, data_query=data_query)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_root_mean_squared_error", 
    patience=10,
    min_delta=0.001,
    restore_best_weights=True
)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr))

history = model.fit(
    train_data, 
    validation_data=test_data, 
    epochs=500, 
    callbacks=[early_stopping]
)

In [None]:
test_metrics = model.evaluate(test_data, return_dict=True)
print(f"Test RMSE: {test_metrics['root_mean_squared_error']}")

predictions = []
actual_ratings = []
for batch in test_data:
    predicted_ratings = model(batch).numpy()
    actual_ratings.extend(batch["rating"].numpy())
    predictions.extend(predicted_ratings)

predictions = np.array(predictions)
actual_ratings = np.array(actual_ratings)

rmse = np.sqrt(np.mean((predictions - actual_ratings) ** 2))
print(f"RECHECK RMSE: {rmse}")

if np.all(actual_ratings == actual_ratings.round()):
    correct = np.mean(predictions.round() == actual_ratings)
    print(f"Rounded Accuracy: {correct}")
else:
    print("Actual ratings are not integers, skipping rounded accuracy calculation.")