# Running Models

In [None]:
import gzip
from collections import defaultdict
import math
import numpy as np
import string
import random
import string

import tensorflow as tf
import tensorflow_recommenders as tfrs
import pandas as pd
import keras_tuner as kt

from latent_factor_model import LatentFactorModel

import warnings
warnings.filterwarnings("ignore")

***
# Description
***

## Latent Factor Models + Neural Corrolative Filteration

$$
\arg \min_{\alpha, \beta, \gamma} \sum_{u,i} \left( \alpha + \beta_u + \beta_i + \gamma_u \cdot \gamma_i - R_{u,i} \right)^2 + \lambda \left[ \sum_u \beta_u^2 + \sum_i \beta_i^2 + \sum_i \left\| \gamma_i \right\|_2^2 + \sum_u \left\| \gamma_u \right\|_2^2 \right]
$$

Single terms:
- **Global bias, $ \alpha $**: the overall average rating across all users and items.
- **User bias, $ \beta_u $**: captures the tendency of user $ u $ to rate items higher or lower than the global average.
- **Item bias, $ \beta_i $**: inherent popularity or quality of item $ i $.
- **User and item latent factors, $ \gamma_u $ and $ \gamma_i $**: capture the latent preferences of user $ u $ and the latent characteristics of item $ i $, respectively.

Combinations of terms:
- **Prediction error**: The expression $ \left( \alpha + \beta_u + \beta_i + \gamma_u \cdot \gamma_i - R_{u,i} \right)^2 $ measures the squared difference between the predicted rating $ (\alpha + \beta_u + \beta_i + \gamma_u \cdot \gamma_i) $ and the actual rating $ R_{u,i} $ for user $ u $ and item $ i $.
- **Regularization term**: The term $ \lambda \left[ \sum_u \beta_u^2 + \sum_i \beta_i^2 + \sum_i \left\| \gamma_i \right\|_2^2 + \sum_u \left\| \gamma_u \right\|_2^2 \right] $ penalizes large values of the biases and latent factors to prevent overfitting. Here:
  - $ \sum_u \beta_u^2 $ and $ \sum_i \beta_i^2 $ apply regularization to the user and item biases, respectively.
  - $ \sum_i \left\| \gamma_i \right\|_2^2 $ and $ \sum_u \left\| \gamma_u \right\|_2^2 $ apply regularization to the latent factors of items and users, respectively.
- **Regularization coefficient, $ \lambda $**: This parameter controls the strength of the regularization, balancing the fit to the data with the complexity of the model.


In [None]:
data_querry = ...
embedding_dim = 32
dense_units = 32
l2_reg = 0.0201
model = LatentFactorModel(l2_reg, dense_units, embedding_dim, data_querry)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_root_mean_squared_error", 
    patience=10,
    min_delta=0.001,
    restore_best_weights=True
)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-4, 
    decay_steps=10000, 
    decay_rate=0.9
)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule))

cached_train = train_data.batch(4096).cache()
cached_test = test_data.batch(4096).cache()
model.fit(cached_train, epochs=200, validation_data=cached_test, callbacks=[early_stopping])

Evaluation

In [None]:
test_metrics = model.evaluate(cached_test, return_dict=True)
print(f"Test RMSE: {test_metrics['root_mean_squared_error']}")

predictions = []
actual_ratings = []
for batch in cached_test:
    predicted_ratings = model(batch).numpy()
    actual_ratings.extend(batch["rating"].numpy())
    predictions.extend(predicted_ratings)

predictions = np.array(predictions)
actual_ratings = np.array(actual_ratings)

rmse = np.sqrt(np.mean((predictions - actual_ratings) ** 2))
print(f"RECHECK RMSE: {rmse}")

if np.all(actual_ratings == actual_ratings.round()):
    correct = np.mean(predictions.round() == actual_ratings)
    print(f"Rounded Accuracy: {correct}")
else:
    print("Actual ratings are not integers, skipping rounded accuracy calculation.")