In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.model_selection import train_test_split
!pip install recommenders
from recommenders.evaluation.python_evaluation import ndcg_at_k

**Pre Processing**

In [None]:
ratings = pd.read_csv("ratings.csv")

In [None]:
ratings.rename(columns={"userId": "user", "movieId": "movie"}, inplace=True)

In [None]:
test_data = pd.DataFrame(columns=["user", "movie", "rating", "timestamp"])
train_data = pd.DataFrame(columns=["user", "movie", "rating", "timestamp"])

In [None]:
movie_index_map = {movie:i+1 for i, movie in enumerate(ratings["movie"].unique())}
user_index_map = {user:i+1 for i, user in enumerate(ratings["user"].unique())}

In [None]:
train_data.to_csv("train_20.csv", index=False)
test_data.to_csv("test_20.csv", index=False)

In [None]:
no_movies = len(ratings["movie"].unique())
no_users = len(ratings["user"].unique())

In [None]:
# user df layers
user_input = tf.keras.Input(shape=(1,))
u_embedding_layer = tf.keras.layers.Embedding(no_users+1, 100, input_length=1)(user_input)
u_embedding_layer = tf.keras.layers.Flatten()(u_embedding_layer)
user_dense1 = tf.keras.layers.Dense(512, activation="relu")(u_embedding_layer)
user_dense2 = tf.keras.layers.Dense(256, activation="relu")(user_dense1)

In [None]:
# item df layers
item_input = tf.keras.Input(shape=(1, ))
i_embedding_layer = tf.keras.layers.Embedding(no_movies+1, 100, input_length=1)(item_input)
i_embedding_layer = tf.keras.layers.Flatten()(i_embedding_layer)
item_dense1 = tf.keras.layers.Dense(512, activation="relu")(i_embedding_layer)
item_dense2 = tf.keras.layers.Dense(256, activation="relu")(item_dense1)

In [None]:
#concatenation layer
concat = tf.keras.layers.concatenate([user_dense2, item_dense2], axis=1)

# interaction network
inter_dense1 = tf.keras.layers.Dense(256, activation="relu")(concat)
inter_dense2 = tf.keras.layers.Dense(128, activation="relu")(inter_dense1)
inter_dense3 = tf.keras.layers.Dense(64, activation="relu")(inter_dense2)
output_layer = tf.keras.layers.Dense(1, activation="linear")(inter_dense3)

model = tf.keras.Model(inputs=[user_input, item_input], outputs=output_layer)
model.compile(optimizer="adam", loss='mse')

In [None]:
# training

train_users, train_movies, train_labels = train_data["user_index"].values, train_data["movie_index"].values, train_data["rating"].values
model.fit([np.array(train_users), np.array(train_movies)], np.array(train_labels), epochs=10, shuffle=True, batch_size=128, validation_split=0.1)

In [None]:
m = tf.keras.metrics.RootMeanSquaredError()
test_users, test_movies, test_ratings = test_data["user_index"].values, test_data["movie_index"].values, test_data["rating"].values
pred_ratings = model.predict([test_users, test_movies])
pred_ratings = np.array([rating[0] for rating in pred_ratings])
test_data["predictions"] = pred_ratings
mae = tf.keras.metrics.mean_absolute_error(np.array(test_data["rating"]), pred_ratings)
print(f"Mean Absolute Error: {np.mean(mae)}")
m.update_state(pred_ratings, test_ratings)
print(f"Root Mean Square Error: {m.result().numpy()}")

In [None]:
model.save("new_ncf_model.h5")

In [None]:
mean_precision = 0
mean_recall = 0
for user in train_data["user_index"].unique():
  test_set = test_data[test_data["user_index"] == user].reset_index()
  ratings_list = list(zip(test_set["rating"], test_set["predictions"]))
  ratings_list.sort(key=lambda x: x[1], reverse=True)

  total_relevant = sum(rat >= 4.0 for rat, pred in ratings_list)
  total_recommended = sum(pred >= 4.0 for rat, pred in ratings_list[:10])
  total_rec_rel = sum(rat >= 4.0 and pred >= 4.0 for rat, pred in ratings_list[:10])

  mean_precision += (total_rec_rel/total_recommended) if total_recommended > 0 else 0
  mean_recall += (total_rec_rel/total_relevant) if total_relevant > 0 else 0
  
print(f"Precision: {mean_precision/len(train_data['user_index'].unique())}")
print(f"Recall: {mean_recall/len(train_data['user_index'].unique())}")

In [None]:
top_n_test_data = test_data.groupby("user", as_index=False).apply(lambda x: x.nlargest(10, "rating")).reset_index(drop=True)

In [None]:
ndcg_at_k(top_n_test_data, test_data, col_user="user", col_item="movie", col_rating="rating", col_prediction="predictions")