<a href="https://colab.research.google.com/github/ElhamHosseini73/Learn-git/blob/main/tensorflow_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow==2.15.0 tensorflow-recommenders

In [2]:
import tensorflow as tf
import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds
from typing import Dict, Text
import numpy as np

In [None]:
# Load the MovieLens dataset
ratings = tfds.load('movielens/100k-ratings', split="train")

In [4]:
# Preprocess the data to extract only the user_id and movie_id
def preprocess_data(data):
    return data.map(lambda x: {
        "user_id": x["user_id"],
        "movie_id": x["movie_id"],
    })

preprocessed_data = preprocess_data(ratings)
train = preprocessed_data.take(80_000)  # Training split
test = preprocessed_data.skip(80_000).take(20_000)  # Testing split


In [5]:
# Extract unique user_ids and movie_ids into a list
user_ids = np.unique([x['user_id'].numpy() for x in train])
movie_ids = np.unique([x['movie_id'].numpy() for x in train])

In [6]:
# User embedding model
user_model = tf.keras.Sequential([
    tf.keras.layers.StringLookup(vocabulary=user_ids, mask_token=None),
    tf.keras.layers.Embedding(input_dim=len(user_ids) + 1, output_dim=32)
])

# Movie embedding model
movie_model = tf.keras.Sequential([
    tf.keras.layers.StringLookup(vocabulary=movie_ids, mask_token=None),
    tf.keras.layers.Embedding(input_dim=len(movie_ids) + 1, output_dim=32)
])

In [7]:
class MovielandsModel(tfrs.Model):
    def __init__(self, user_model, movie_model, movie_ids):
        super().__init__()
        self.user_model = user_model
        self.movie_model = movie_model

        # Convert movie_ids (numpy array) to a TensorFlow Dataset
        movie_dataset = tf.data.Dataset.from_tensor_slices(movie_ids)

        # Create a FactorizedTopK layer with candidates as batched movie embeddings
        self.task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
            candidates=movie_dataset.batch(128).map(lambda x: movie_model(x))
        ))

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        # Get user and movie embeddings
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_id"])

        # Return the loss from the task (retrieval)
        return self.task(user_embeddings, movie_embeddings)

# Initialize the model
model = MovielandsModel(user_model=user_model, movie_model=movie_model, movie_ids=movie_ids)


In [10]:
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [None]:
model.fit(train.batch(4096), epochs=3)

In [None]:
model.evaluate(test.batch(4096), return_dict=True)

In [15]:
# Convert movie_ids (which is a numpy array) into a TensorFlow dataset
movie_dataset = tf.data.Dataset.from_tensor_slices(movie_ids)

# Build the retrieval index using the BruteForce layer
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

# Index the movie embeddings
index.index_from_dataset(
    movie_dataset.batch(100).map(lambda movie_id: (movie_id, model.movie_model(movie_id)))
)

# Get top 5 movie recommendations for a specific user
_, movies = index(tf.constant(["user_1"]), k=5)

# Print the recommendations
print(f"Top 5 movie recommendations for user_1: {movies.numpy()}")


Top 5 movie recommendations for user_1: [[b'273' b'544' b'129' b'952' b'293']]


In [None]:
index.save("movieland_retrieval_model")


In [9]:
# Inspect the dataset
for example in train.take(1):
    print(example)

{'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'138'>, 'movie_id': <tf.Tensor: shape=(), dtype=string, numpy=b'357'>}
