Using TensorFlow and the RNEM model on the MovieLens dataset to recommend a movie to a user based on their past ratings.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

Load the MovieLens dataset

In [None]:
dataset = tfds.load(name="movielens/100k-ratings", split="train")

df = tfds.as_dataframe(dataset)

train_size = int(0.8 * len(df))
train_df = df[:train_size]
test_df = df[train_size:]

print("Train set shape:", train_df.shape)
print("Test set shape:", test_df.shape)

Map movie and user IDs to contiguous integers

In [None]:
unique_movies = df["movie_id"].unique()
unique_users = df["user_id"].unique()
movie_id_map = dict(zip(unique_movies, range(len(unique_movies))))
user_id_map = dict(zip(unique_users, range(len(unique_users))))
train_df["movie_id"] = train_df["movie_id"].map(movie_id_map)
train_df["user_id"] = train_df["user_id"].map(user_id_map)
test_df["movie_id"] = test_df["movie_id"].map(movie_id_map)
test_df["user_id"] = test_df["user_id"].map(user_id_map)

Split the dataset into training and testing sets

Compute the number of entities and relations

In [None]:
num_movies = len(unique_movies)
num_users = len(unique_users)
num_ratings = len(df)
num_relations = 2
num_entities = num_movies + num_user

Define the RNEM model architecture

In [None]:

def RNEM(num_relations, num_entities, hidden_size):
    # Define placeholders for input data
    relations = tf.placeholder(tf.float32, [None, num_relations])
    entities = tf.placeholder(tf.float32, [None, num_entities])

    # Define a fully connected layer to encode the relations
    relations_encoding = tf.layers.dense(relations, hidden_size, activation=tf.nn.relu)

    # Define a fully connected layer to encode the entities
    entities_encoding = tf.layers.dense(entities, hidden_size, activation=tf.nn.relu)

    # Define a matrix multiplication layer to compute the likelihoods of each relation
    logits = tf.matmul(relations_encoding, tf.transpose(entities_encoding))

    # Define a sigmoid activation function to compute the probabilities
    probabilities = tf.nn.sigmoid(logits)

    # Define a placeholder for the ground truth data
    ground_truth = tf.placeholder(tf.float32, [None, num_relations])

    # Define a binary cross-entropy loss function
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=ground_truth, logits=logits))

    # Define an optimizer to minimize the loss
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)

    # Return the model components
    return relations, entities, probabilities, ground_truth, loss, optimizer


Instantiate the RNEM model

In [None]:

hidden_size = 50
relations, entities, probabilities, ground_truth, loss, optimizer = RNEM(num_relations, num_entities, hidden_size)

Initialize a TensorFlow session and run the optimizer

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(100):
        # Sample a batch of training data
        batch_size = 1000
        indices = np.random.choice(len(train_df), batch_size)
        batch_relations = np.zeros((batch_size, num_relations))
        batch_entities = np.zeros((batch_size, num_entities))
        batch_ground_truth = np.zeros((batch_size, num_relations))
        for j, index in enumerate(indices):
            row = train_df.iloc[index]
            batch_relations[j, 0] = 1
            batch_entities[j, row["user_id"]] = 1
            batch_entities[j, num_users + row["movie_id"]] = 1
            batch_ground_truth[j, 0] = row["rating"]

           
           

Run the optimizer on the batch

In [None]:
feed_dict = {relations: batch_relations, entities: batch_entities, ground_truth: batch_ground_truth}
sess.run(optimizer, feed_dict=feed_dict)

Compute the model predictions on the test set

In [None]:
test_relations = np.zeros((len(test_df), num_relations))
test_entities = np.zeros((len(test_df), num_entities))
for j, row in test_df.iterrows():
    test_relations[j, 0] = 1
    test_entities[j, row["user_id"]] = 1
    test_entities[j, num_users + row["movie_id"]] = 1
test_probabilities = sess.run(probabilities, feed_dict={relations: test_relations, entities: test_entities})

   Compute the probabilities for each movie and print out the recommended movie based on user

In [None]:

    user_id = 0
    user_entities = np.zeros((num_users, num_entities))
    user_entities[:, user_id] = 1
    movie_entities = np.zeros((num_movies, num_entities))
    movie_entities[:, num_users:] = np.eye(num_movies)
    user_relations = np.zeros((num_users, num_relations))
    user_relations[:, 0] = 1
    feed_dict = {relations: user_relations, entities: np.concatenate([user_entities, movie_entities], axis=0)}
    user_probabilities = sess.run(probabilities, feed_dict=feed_dict)[0, num_users:]
    top_indices = np.argsort(user_probabilities)[::-1][:10]
    top_movies = [df[df["movie_id"] == movie_id_map[index]].iloc[0]["movie_title"] for index in top_indices]
    

In [None]:
print("Top recommended movies for user %d:" % user_id)
    for i, movie in enumerate(top_movies):
        print("%d. %s" % (i + 1, movie))

Compute the Area Under Curve (AUC) metric

In [None]:
from sklearn.metrics import roc_auc_score
test_ground_truth = np.array(test_df["rating"])
auc_score = roc_auc_score(test_ground_truth, test_probabilities)
print("AUC score:", auc_score)"