<a href="https://colab.research.google.com/github/MammadovN/Machine_Learning/blob/main/projects/06_real-world-apps/movie-recommendation-system/movie_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install TensorFlow Recommenders and other dependencies
!pip install tensorflow tensorflow-recommenders pandas numpy

Collecting tensorflow-recommenders
  Downloading tensorflow_recommenders-0.7.3-py3-none-any.whl.metadata (4.6 kB)
Downloading tensorflow_recommenders-0.7.3-py3-none-any.whl (96 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-recommenders
Successfully installed tensorflow-recommenders-0.7.3


In [3]:
# Import required libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs
from typing import Dict, Text

In [4]:
# Download the MovieLens dataset
!wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
!unzip ml-latest-small.zip

--2025-05-05 07:20:53--  https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘ml-latest-small.zip’


2025-05-05 07:20:53 (6.57 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]

Archive:  ml-latest-small.zip
   creating: ml-latest-small/
  inflating: ml-latest-small/links.csv  
  inflating: ml-latest-small/tags.csv  
  inflating: ml-latest-small/ratings.csv  
  inflating: ml-latest-small/README.txt  
  inflating: ml-latest-small/movies.csv  


In [5]:
# Load the ratings and movies data using Pandas
ratings_df = pd.read_csv("ml-latest-small/ratings.csv")
movies_df = pd.read_csv("ml-latest-small/movies.csv")

# Convert Pandas DataFrames to TensorFlow datasets
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_df))
movies = tf.data.Dataset.from_tensor_slices(dict(movies_df))

In [6]:
# Extract unique user IDs
unique_user_ids = np.unique(ratings_df["userId"].values.astype(str))

# Extract unique movie IDs
unique_movie_ids = np.unique(movies_df["movieId"].values.astype(str))

# Print the unique IDs
print("Unique User IDs:", unique_user_ids)
print("Unique Movie IDs:", unique_movie_ids)

Unique User IDs: ['1' '10' '100' '101' '102' '103' '104' '105' '106' '107' '108' '109' '11'
 '110' '111' '112' '113' '114' '115' '116' '117' '118' '119' '12' '120'
 '121' '122' '123' '124' '125' '126' '127' '128' '129' '13' '130' '131'
 '132' '133' '134' '135' '136' '137' '138' '139' '14' '140' '141' '142'
 '143' '144' '145' '146' '147' '148' '149' '15' '150' '151' '152' '153'
 '154' '155' '156' '157' '158' '159' '16' '160' '161' '162' '163' '164'
 '165' '166' '167' '168' '169' '17' '170' '171' '172' '173' '174' '175'
 '176' '177' '178' '179' '18' '180' '181' '182' '183' '184' '185' '186'
 '187' '188' '189' '19' '190' '191' '192' '193' '194' '195' '196' '197'
 '198' '199' '2' '20' '200' '201' '202' '203' '204' '205' '206' '207'
 '208' '209' '21' '210' '211' '212' '213' '214' '215' '216' '217' '218'
 '219' '22' '220' '221' '222' '223' '224' '225' '226' '227' '228' '229'
 '23' '230' '231' '232' '233' '234' '235' '236' '237' '238' '239' '24'
 '240' '241' '242' '243' '244' '245' '246' '247

In [7]:
# Preprocess the ratings data
def preprocess_data(features):
    return {
        "user_id": tf.strings.as_string(features["userId"]),
        "movie_id": tf.strings.as_string(features["movieId"])
    }, features["rating"]

# Preprocess the ratings data
train_data = ratings.map(preprocess_data)

In [8]:
# Define a simple ranking model
class RankingModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        # User embeddings
        self.user_embeddings = tf.keras.Sequential([
            tf.keras.layers.StringLookup(vocabulary=unique_user_ids),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])

        # Movie embeddings
        self.movie_embeddings = tf.keras.Sequential([
            tf.keras.layers.StringLookup(vocabulary=unique_movie_ids),
            tf.keras.layers.Embedding(len(unique_movie_ids) + 1, embedding_dimension)
        ])

        # Combine features
        self.ratings = tf.keras.Sequential([
            tf.keras.layers.Dense(256, activation="relu"),
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dense(1)
        ])

    def call(self, inputs):
        # Extract user_id and movie_id from the dictionary
        user_id = inputs["user_id"]  # Access the "user_id" key
        movie_id = inputs["movie_id"]  # Access the "movie_id" key

        # Generate embeddings
        user_embedding = self.user_embeddings(user_id)
        movie_embedding = self.movie_embeddings(movie_id)

        # Predict rating
        return self.ratings(tf.concat([user_embedding, movie_embedding], axis=1))

In [9]:
model = RankingModel()

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1), loss=tf.keras.losses.MeanSquaredError())

# Train the model
model.fit(train_data.batch(100), epochs=3)

Epoch 1/3
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 15ms/step - loss: 2.3232
Epoch 2/3
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11ms/step - loss: 1.0390
Epoch 3/3
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.9265


<keras.src.callbacks.history.History at 0x7a2196ee0a50>

In [10]:
def recommend_movies(user_id: str, n_recommendations: int = 10):

    # 1) Convert all movie IDs (strings) to a tensor
    candidate_movie_ids = tf.constant(unique_movie_ids)

    # 2) Expand the single user_id into a vector of the same length as candidate_movie_ids
    user_ids = tf.repeat(tf.constant([user_id]), repeats=len(candidate_movie_ids))

    # 3) Get predicted scores from the model
    scores = model(
        {"user_id": user_ids, "movie_id": candidate_movie_ids},
        training=False,  # inference mode
    )  # shape: (N, 1)
    scores = tf.squeeze(scores, axis=1)  # shape: (N,)

    # 4) Select the top-k highest scores
    top_k = tf.math.top_k(scores, k=n_recommendations)

    # 5) Convert movie IDs and scores to NumPy for easy use
    recommended_ids = tf.gather(candidate_movie_ids, top_k.indices).numpy().astype(str)
    recommended_scores = top_k.values.numpy()

    return list(zip(recommended_ids, recommended_scores))


In [11]:
sample_user = "1"
recs = recommend_movies(sample_user, n_recommendations=10)

print(f"\nTop-10 recommendations for user {sample_user}:")
for movie_id, score in recs:
    title = movies_df.loc[movies_df["movieId"] == int(movie_id), "title"].item()
    print(f"{title:<60}  (predicted rating: {score:.2f})")


Top-10 recommendations for user 1:
Shawshank Redemption, The (1994)                              (predicted rating: 4.51)
Godfather, The (1972)                                         (predicted rating: 4.43)
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)  (predicted rating: 4.40)
Matrix, The (1999)                                            (predicted rating: 4.40)
Fight Club (1999)                                             (predicted rating: 4.39)
Pulp Fiction (1994)                                           (predicted rating: 4.39)
My Fair Lady (1964)                                           (predicted rating: 4.37)
Reservoir Dogs (1992)                                         (predicted rating: 4.37)
Rear Window (1954)                                            (predicted rating: 4.36)
Monty Python and the Holy Grail (1975)                        (predicted rating: 4.36)
