In [5]:
import sys
sys.path.append("..")

import tensorflow as tf
import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds

from src.DCN import MovielensModel

model = MovielensModel(0.5, [96, 32], [64, 32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.load_weights('../models/DNN-7-96_32-64_32-emb_0.25-cross_emb/best/cp-174_loss-14645_acc100-0.3109_rmse-0.9815.ckpt')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f3297b28210>

In [147]:
ratings = tfds.load("movielens/100k-ratings", split="train")
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"],
    "user_gender": int(x["user_gender"]),
    "user_zip_code": x["user_zip_code"],
    "user_occupation_text": x["user_occupation_text"],
    "bucketized_user_age": int(x["bucketized_user_age"]),
    "timestamp": x["timestamp"],
})
ratings = ratings.batch(4096).cache()

movies = tfds.load("movielens/100k-movies", split="train")
movies = movies.map(lambda x: x["movie_title"])

In [148]:
model.evaluate(ratings, return_dict=True)



{'root_mean_squared_error': 0.8618263006210327,
 'factorized_top_k/top_1_categorical_accuracy': 0.009060000069439411,
 'factorized_top_k/top_5_categorical_accuracy': 0.04741000011563301,
 'factorized_top_k/top_10_categorical_accuracy': 0.08121000230312347,
 'factorized_top_k/top_50_categorical_accuracy': 0.2769399881362915,
 'factorized_top_k/top_100_categorical_accuracy': 0.44159001111984253,
 'loss': 5506.12841796875,
 'regularization_loss': 0,
 'total_loss': 5506.12841796875}

In [149]:
index = tfrs.layers.factorized_top_k.BruteForce(model.query_model)
index.index_from_dataset(
    tf.data.Dataset.zip((movies.batch(100), movies.batch(100).map(model.candidate_model)))
)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7f31bc229510>

In [150]:
def get_user_data(user_id):
    user_id = str(user_id)
    user_ratings = ratings.unbatch().filter(lambda x: x['user_id'] == user_id)
    user_data = list(user_ratings.take(1))[0]
    user_data = {
        "user_id": np.array([user_data["user_id"].numpy()]),
        "user_gender": np.array([user_data["user_gender"].numpy()]),
        "user_zip_code": np.array([user_data["user_zip_code"].numpy()]),
        "user_occupation_text": np.array([user_data["user_occupation_text"].numpy()]),
        "bucketized_user_age": np.array([user_data["bucketized_user_age"].numpy()]),
        "timestamp": np.array([user_data["timestamp"].numpy()])
    }
    movies_seen = np.unique(np.concatenate(list(user_ratings.batch(1_000).map(
        lambda x: x["movie_title"]))))[np.newaxis, ...]

    return user_data, movies_seen

In [151]:
import numpy as np

user_data, movies_seen = get_user_data(15)
# scores, titles = index(user_data, k=15)
scores, titles = index.query_with_exclusions(user_data, movies_seen, k=15)





In [152]:
titles, scores

(<tf.Tensor: shape=(1, 15), dtype=string, numpy=
 array([[b'Truth or Consequences, N.M. (1997)',
         b'To Gillian on Her 37th Birthday (1996)',
         b'Assignment, The (1997)',
         b'Last Klezmer: Leopold Kozlowski, His Life and Music, The (1995)',
         b'Evening Star, The (1996)', b'In Love and War (1996)',
         b'Associate, The (1996)', b'A Chef in Love (1996)',
         b'Journey of August King, The (1995)',
         b'Zeus and Roxanne (1997)', b'Infinity (1996)',
         b'Flipper (1996)', b'Jane Eyre (1996)',
         b"I'm Not Rappaport (1996)", b'Swept from the Sea (1997)']],
       dtype=object)>,
 <tf.Tensor: shape=(1, 15), dtype=float32, numpy=
 array([[5.318854 , 5.0321703, 4.946436 , 4.8859577, 4.8824406, 4.8518567,
         4.818259 , 4.8145103, 4.7733364, 4.7049975, 4.629643 , 4.6278663,
         4.4265757, 4.3398347, 4.3255377]], dtype=float32)>)