In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow_datasets as tfds

# Load the MovieLens dataset
ratings = tfds.load('movielens/100k-ratings', split="train")
movies = tfds.load('movielens/100k-movies', split="train")

# Preprocess the dataset to ensure IDs are integers and zero-indexed
ratings = ratings.map(lambda x: {
    'movie_id': tf.cast(tf.strings.to_number(x['movie_id']), tf.int32) - 1,  # zero-index movie_id
    'user_id': tf.cast(tf.strings.to_number(x['user_id']), tf.int32) - 1,    # zero-index user_id
    'user_rating': x['user_rating']
})

# Find the actual number of unique users and movies using reduce
unique_movie_ids = ratings.map(lambda x: x['movie_id']).apply(tf.data.experimental.unique())
unique_user_ids = ratings.map(lambda x: x['user_id']).apply(tf.data.experimental.unique())

# Convert dataset to numpy arrays to calculate the size
def get_unique_count(dataset):
    count = len(list(dataset.as_numpy_iterator()))
    return count

num_movies = get_unique_count(unique_movie_ids)
num_users = get_unique_count(unique_user_ids)

print(f"Number of unique movies: {num_movies}")
print(f"Number of unique users: {num_users}")

# Create embedding model
class MovieLensModel(tf.keras.Model):
    def __init__(self, num_users, num_movies, embedding_dim):
        super().__init__()
        # Embedding layers
        self.user_embedding = tf.keras.layers.Embedding(num_users, embedding_dim)
        self.movie_embedding = tf.keras.layers.Embedding(num_movies, embedding_dim)
        # Compute dot product between user and movie embeddings
        self.dot = tf.keras.layers.Dot(axes=1)

    def call(self, inputs):
        user_id, movie_id = inputs
        user_vec = self.user_embedding(user_id)
        movie_vec = self.movie_embedding(movie_id)
        return self.dot([user_vec, movie_vec])

# Use actual unique values for num_users and num_movies
embedding_dim = 50  # Size of the embedding vectors

model = MovieLensModel(num_users, num_movies, embedding_dim)

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Prepare training data (user_ids, movie_ids, ratings)
def prepare_data(ratings):
    user_ids = []
    movie_ids = []
    ratings_ = []

    for rating in ratings:
        user_ids.append(int(rating['user_id'].numpy()))
        movie_ids.append(int(rating['movie_id'].numpy()))
        ratings_.append(float(rating['user_rating'].numpy()))

    return np.array(user_ids), np.array(movie_ids), np.array(ratings_)

# Split data into training and validation sets
train_ratings = ratings.take(80000)
test_ratings = ratings.skip(80000)

# Prepare the data
train_user_ids, train_movie_ids, train_ratings = prepare_data(train_ratings)
test_user_ids, test_movie_ids, test_ratings = prepare_data(test_ratings)

# Train the model
model.fit([train_user_ids, train_movie_ids], train_ratings, epochs=5,
          validation_data=([test_user_ids, test_movie_ids], test_ratings))

# Predict ratings for new user-movie pairs
predicted_ratings = model.predict([test_user_ids, test_movie_ids])

# Recommend top movies for a user
def recommend_movies(user_id, num_recommendations=10):
    # Predict ratings for all movies for a specific user
    movie_ids = np.arange(num_movies)
    predicted_ratings = model.predict([np.full(movie_ids.shape, user_id), movie_ids])

    # Get top movie IDs
    top_movie_ids = np.argsort(predicted_ratings, axis=0)[-num_recommendations:]
    return top_movie_ids

# Example: Recommend top 10 movies for user 42
recommendations = recommend_movies(42, 10)
print(f"Recommended movie IDs for user 42: {recommendations}")


Number of unique movies: 1682
Number of unique users: 943
Epoch 1/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 11.2776 - val_loss: 1.4139
Epoch 2/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 1.1754 - val_loss: 0.9937
Epoch 3/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 0.9142 - val_loss: 0.9371
Epoch 4/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.8466 - val_loss: 0.9131
Epoch 5/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.7817 - val_loss: 0.8966
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 960us/step
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Recommended movie IDs for user 42: [[ 27]
 [301]
 [407]
 [ 21]
 [ 63]
 [173]
 [312]
 [271]
 [ 49]
 [317]]


In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow_datasets as tfds

# Load the MovieLens dataset
ratings = tfds.load('movielens/100k-ratings', split="train")
movies = tfds.load('movielens/100k-movies', split="train")

# Preprocess the dataset to ensure IDs are integers and zero-indexed
ratings = ratings.map(lambda x: {
    'movie_id': tf.cast(tf.strings.to_number(x['movie_id']), tf.int32) - 1,  # zero-index movie_id
    'user_id': tf.cast(tf.strings.to_number(x['user_id']), tf.int32) - 1,    # zero-index user_id
    'user_rating': x['user_rating']
})

# Get movie titles and movie IDs into a dictionary
movie_titles = {}
for movie in movies:
    movie_id = int(movie['movie_id'].numpy()) - 1  # zero-index movie_id
    movie_title = movie['movie_title'].numpy().decode('utf-8')
    movie_titles[movie_id] = movie_title

# Find the actual number of unique users and movies using reduce
unique_movie_ids = ratings.map(lambda x: x['movie_id']).apply(tf.data.experimental.unique())
unique_user_ids = ratings.map(lambda x: x['user_id']).apply(tf.data.experimental.unique())

# Convert dataset to numpy arrays to calculate the size
def get_unique_count(dataset):
    count = len(list(dataset.as_numpy_iterator()))
    return count

num_movies = get_unique_count(unique_movie_ids)
num_users = get_unique_count(unique_user_ids)

print(f"Number of unique movies: {num_movies}")
print(f"Number of unique users: {num_users}")

# Create embedding model
class MovieLensModel(tf.keras.Model):
    def __init__(self, num_users, num_movies, embedding_dim):
        super().__init__()
        # Embedding layers
        self.user_embedding = tf.keras.layers.Embedding(num_users, embedding_dim)
        self.movie_embedding = tf.keras.layers.Embedding(num_movies, embedding_dim)
        # Compute dot product between user and movie embeddings
        self.dot = tf.keras.layers.Dot(axes=1)

    def call(self, inputs):
        user_id, movie_id = inputs
        user_vec = self.user_embedding(user_id)
        movie_vec = self.movie_embedding(movie_id)
        return self.dot([user_vec, movie_vec])

# Use actual unique values for num_users and num_movies
embedding_dim = 50  # Size of the embedding vectors

model = MovieLensModel(num_users, num_movies, embedding_dim)

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Prepare training data (user_ids, movie_ids, ratings)
def prepare_data(ratings):
    user_ids = []
    movie_ids = []
    ratings_ = []

    for rating in ratings:
        user_ids.append(int(rating['user_id'].numpy()))
        movie_ids.append(int(rating['movie_id'].numpy()))
        ratings_.append(float(rating['user_rating'].numpy()))

    return np.array(user_ids), np.array(movie_ids), np.array(ratings_)

# Split data into training and validation sets
train_ratings = ratings.take(80000)
test_ratings = ratings.skip(80000)

# Prepare the data
train_user_ids, train_movie_ids, train_ratings = prepare_data(train_ratings)
test_user_ids, test_movie_ids, test_ratings = prepare_data(test_ratings)

# Train the model
model.fit([train_user_ids, train_movie_ids], train_ratings, epochs=25,
          validation_data=([test_user_ids, test_movie_ids], test_ratings))

# Predict ratings for new user-movie pairs
predicted_ratings = model.predict([test_user_ids, test_movie_ids])

# Recommend top movies for a user
def recommend_movies(user_id, num_recommendations=10):
    # Predict ratings for all movies for a specific user
    movie_ids = np.arange(num_movies)
    predicted_ratings = model.predict([np.full(movie_ids.shape, user_id), movie_ids])

    # Get top movie IDs
    top_movie_ids = np.argsort(predicted_ratings.flatten())[-num_recommendations:]
    return top_movie_ids

# Example: Recommend top 10 movies for user 42
user_id = 42  # For zero-indexed user ID
recommendations = recommend_movies(user_id, 10)

# Convert movie IDs to titles
recommended_movie_titles = [movie_titles[movie_id] for movie_id in recommendations]

print(f"Recommended movies for user {user_id + 1}: {recommended_movie_titles}")


Instructions for updating:
Use `tf.data.Dataset.unique(...)


Instructions for updating:
Use `tf.data.Dataset.unique(...)


Number of unique movies: 1682
Number of unique users: 943
Epoch 1/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 875us/step - loss: 11.2264 - val_loss: 1.4318
Epoch 2/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 877us/step - loss: 1.1702 - val_loss: 0.9919
Epoch 3/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 856us/step - loss: 0.8945 - val_loss: 0.9267
Epoch 4/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 848us/step - loss: 0.8189 - val_loss: 0.9016
Epoch 5/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 844us/step - loss: 0.7667 - val_loss: 0.8859
Epoch 6/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 838us/step - loss: 0.7138 - val_loss: 0.8830
Epoch 7/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 840us/step - loss: 0.6521 - val_loss: 0.8797
Epoch 8/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow_datasets as tfds

# Load the MovieLens dataset
ratings = tfds.load('movielens/100k-ratings', split="train")
movies = tfds.load('movielens/100k-movies', split="train")

# Preprocess the dataset to ensure IDs are integers and zero-indexed
ratings = ratings.map(lambda x: {
    'movie_id': tf.cast(tf.strings.to_number(x['movie_id']), tf.int32) - 1,
    'user_id': tf.cast(tf.strings.to_number(x['user_id']), tf.int32) - 1,
    'user_rating': x['user_rating']
})

# Get movie titles into a dictionary
movie_titles = {}
for movie in movies:
    movie_id = int(movie['movie_id'].numpy()) - 1
    movie_title = movie['movie_title'].numpy().decode('utf-8')
    movie_titles[movie_id] = movie_title

# Find the actual number of unique users and movies
num_movies = ratings.map(lambda x: x['movie_id']).apply(tf.data.experimental.unique()).reduce(0, lambda x, _: x + 1)
num_users = ratings.map(lambda x: x['user_id']).apply(tf.data.experimental.unique()).reduce(0, lambda x, _: x + 1)

print(f"Number of unique movies: {num_movies.numpy()}")
print(f"Number of unique users: {num_users.numpy()}")

# Create embedding model
class MovieLensModel(tf.keras.Model):
    def __init__(self, num_users, num_movies, embedding_dim):
        super().__init__()
        self.user_embedding = tf.keras.layers.Embedding(num_users, embedding_dim, embeddings_regularizer=tf.keras.regularizers.l2(0.01))
        self.movie_embedding = tf.keras.layers.Embedding(num_movies, embedding_dim, embeddings_regularizer=tf.keras.regularizers.l2(0.01))
        self.dot = tf.keras.layers.Dot(axes=1)

    def call(self, inputs):
        user_id, movie_id = inputs
        user_vec = self.user_embedding(user_id)
        movie_vec = self.movie_embedding(movie_id)
        return self.dot([user_vec, movie_vec])

# Use a larger embedding dimension
embedding_dim = 100

model = MovieLensModel(num_users.numpy(), num_movies.numpy(), embedding_dim)

# Compile the model with a different loss function
model.compile(optimizer='adam', loss='mae')

# Prepare training data
def prepare_data(ratings):
    user_ids, movie_ids, ratings_ = [], [], []
    for rating in ratings:
        user_ids.append(int(rating['user_id'].numpy()))
        movie_ids.append(int(rating['movie_id'].numpy()))
        ratings_.append(float(rating['user_rating'].numpy()))
    return np.array(user_ids), np.array(movie_ids), np.array(ratings_)

# Split data into training and validation sets
train_ratings = ratings.take(80000)
test_ratings = ratings.skip(80000)

train_user_ids, train_movie_ids, train_ratings = prepare_data(train_ratings)
test_user_ids, test_movie_ids, test_ratings = prepare_data(test_ratings)

# Implement EarlyStopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
model.fit([train_user_ids, train_movie_ids], train_ratings, epochs=25,
          validation_data=([test_user_ids, test_movie_ids], test_ratings),
          callbacks=[early_stopping])

# Recommend top movies for a user
def recommend_movies(user_id, num_recommendations=10):
    movie_ids = np.arange(num_movies.numpy())
    predicted_ratings = model.predict([np.full(movie_ids.shape, user_id), movie_ids])
    top_movie_ids = np.argsort(predicted_ratings.flatten())[-num_recommendations:]
    return top_movie_ids

# Example: Recommend top 10 movies for user 42
user_id = 42
recommendations = recommend_movies(user_id, 10)

# Convert movie IDs to titles
recommended_movie_titles = [movie_titles[movie_id] for movie_id in recommendations]

print(f"Recommended movies for user {user_id + 1}: {recommended_movie_titles}")


Number of unique movies: 1682
Number of unique users: 943
Epoch 1/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.5999 - val_loss: 3.5260
Epoch 2/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.5286 - val_loss: 3.5260
Epoch 3/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.5311 - val_loss: 3.5260
Epoch 4/25
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.5343 - val_loss: 3.5260
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Recommended movies for user 43: ['Low Down Dirty Shame, A (1994)', 'Starship Troopers (1997)', 'Believers, The (1987)', 'Striptease (1996)', 'Cabin Boy (1994)', 'Gold Diggers: The Secret of Bear Mountain (1995)', 'Crossing Guard, The (1995)', 'Crows and Sparrows (1949)', "Pharaoh's Army (1995)", 'Convent, The (Convento, O) (1995)']
