In [None]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/96.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Multiply, Dropout
from tensorflow.keras.optimizers import Adam

ratings = tfds.load("movielens/100k-ratings", split="train")

Downloading and preparing dataset 4.70 MiB (download: 4.70 MiB, generated: 32.41 MiB, total: 37.10 MiB) to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/100000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/movielens/100k-ratings/incomplete.OYMJCF_0.1.1/movielens-train.tfrecord*..…

Dataset movielens downloaded and prepared to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1. Subsequent calls will reuse this data.


In [None]:
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "rating": x["user_rating"]
})

In [None]:
# Extract unique users and movies
user_ids = ratings.map(lambda x: x["user_id"])
movie_titles = ratings.map(lambda x: x["movie_title"])

user_ids = np.unique(np.array([item.numpy() for item in user_ids]))
movie_titles = np.unique(np.array([item.numpy() for item in movie_titles]))

# Map user ids and movie titles to integers
user_to_int = {u: i for i, u in enumerate(user_ids)}
movie_to_int = {m: i for i, m in enumerate(movie_titles)}

In [None]:
num_users = len(user_to_int)
num_movies = len(movie_to_int)

In [None]:
user_input = Input(shape=(1,), name='user_input', dtype='int32')
movie_input = Input(shape=(1,), name='movie_input', dtype='int32')
# GMF part
gmf_user_embedding = Embedding(num_users, 10, input_length=1, name='gmf_user_embedding')(user_input)
gmf_user_embedding = Flatten()(gmf_user_embedding)

gmf_movie_embedding = Embedding(num_movies, 10, input_length=1, name='gmf_movie_embedding')(movie_input)
gmf_movie_embedding = Flatten()(gmf_movie_embedding)

gmf_vector = Multiply()([gmf_user_embedding, gmf_movie_embedding])

# MLP part
mlp_user_embedding = Embedding(num_users, 32, input_length=1, name='mlp_user_embedding')(user_input)
mlp_user_embedding = Flatten()(mlp_user_embedding)

mlp_movie_embedding = Embedding(num_movies, 32, input_length=1, name='mlp_movie_embedding')(movie_input)
mlp_movie_embedding = Flatten()(mlp_movie_embedding)

mlp_vector = Concatenate()([mlp_user_embedding, mlp_movie_embedding])
mlp_vector = Dense(64, activation='relu')(mlp_vector)
mlp_vector = Dropout(0.2)(mlp_vector)
mlp_vector = Dense(32, activation='relu')(mlp_vector)
mlp_vector = Dropout(0.2)(mlp_vector)
mlp_vector = Dense(16, activation='relu')(mlp_vector)

# Combine GMF and MLP parts
combined_vector = Concatenate()([gmf_vector, mlp_vector])
outputs = Dense(1, activation='linear')(combined_vector)

In [None]:
# Build and compile the model
model = Model(inputs=[user_input, movie_input], outputs=outputs)
model.compile(optimizer=Adam(0.001), loss='mean_squared_error')

model.summary()

In [None]:
# Prepare the dataset
dataset = [(user_to_int[x["user_id"]], movie_to_int[x["movie_title"]], x["rating"]) for x in ratings.as_numpy_iterator()]
dataset = np.array(dataset, dtype='int32')

In [None]:
# Split the data
np.random.shuffle(dataset)
train_size = int(0.8 * len(dataset))
train_data = dataset[:train_size]
val_data = dataset[train_size:]

In [None]:
train_data

array([[ 114,  414,    4],
       [ 573,  675,    1],
       [ 838,   44,    3],
       ...,
       [ 729,  369,    2],
       [ 682, 1637,    3],
       [ 265,  628,    4]], dtype=int32)

In [None]:
# Prepare training and validation datasets
train_user_data, train_movie_data, train_ratings = [train_data[:, 0], train_data[:, 1], train_data[:, 2]]
val_user_data, val_movie_data, val_ratings = [val_data[:, 0], val_data[:, 1], val_data[:, 2]]

In [None]:
history = model.fit([train_user_data, train_movie_data], train_ratings,
                    validation_data=([val_user_data, val_movie_data], val_ratings),
                    epochs=10,  # You can change the number of epochs
                    batch_size=32)  # And the batch size

In [None]:
# Example user IDs and movie IDs
user_ids = np.array([1, 2, 3])
item_ids = np.array([10, 20, 30])

# Predicting the ratings
predicted_ratings = model.predict([user_ids, item_ids])

# Output the predictions
print(predicted_ratings)