In [None]:
!wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
!unzip ml-latest-small.zip

In [2]:
from sklearn.preprocessing import LabelEncoder, StandardScaler, MultiLabelBinarizer, MinMaxScaler
import pandas as pd
import numpy as np

In [3]:
ratings_df = pd.read_csv('ml-latest-small/ratings.csv')
movies_df = pd.read_csv('ml-latest-small/movies.csv')
tags_df = pd.read_csv('ml-latest-small/tags.csv')

In [4]:
data = ratings_df.merge(movies_df, on='movieId')


In [5]:
data

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
...,...,...,...,...,...,...
100831,610,166534,4.0,1493848402,Split (2017),Drama|Horror|Thriller
100832,610,168248,5.0,1493850091,John Wick: Chapter Two (2017),Action|Crime|Thriller
100833,610,168250,5.0,1494273047,Get Out (2017),Horror
100834,610,168252,5.0,1493846352,Logan (2017),Action|Sci-Fi


In [6]:
data['genres_list'] = data['genres'].apply(lambda x: x.split('|'))

In [7]:
unique_genres = sorted(set(g for sublist in data['genres_list'] for g in sublist))
unique_genres

['(no genres listed)',
 'Action',
 'Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'IMAX',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western']

In [8]:
genre_to_idx = {genre: idx for idx, genre in enumerate(unique_genres)}
data['genres_encoded'] = data['genres_list'].apply(lambda lst: [genre_to_idx[g] for g in lst])

In [9]:
genre_to_idx

{'(no genres listed)': 0,
 'Action': 1,
 'Adventure': 2,
 'Animation': 3,
 'Children': 4,
 'Comedy': 5,
 'Crime': 6,
 'Documentary': 7,
 'Drama': 8,
 'Fantasy': 9,
 'Film-Noir': 10,
 'Horror': 11,
 'IMAX': 12,
 'Musical': 13,
 'Mystery': 14,
 'Romance': 15,
 'Sci-Fi': 16,
 'Thriller': 17,
 'War': 18,
 'Western': 19}

In [10]:
def process(df):
    label_encoder = LabelEncoder()
    mlb = MultiLabelBinarizer()
    standard_scaler = StandardScaler()
    min_max_scaler = MinMaxScaler()

    df['movieId'] = label_encoder.fit_transform(df['movieId'])
    df['userId'] = label_encoder.fit_transform(df['userId'])

    df['genres'] = df['genres'].apply(lambda x: x.split('|'))
    unique_genres = sorted(set(g for sublist in df['genres'] for g in sublist))
    genre_to_idx = {genre: idx for idx, genre in enumerate(unique_genres)}
    df['genres'] = df['genres'].apply(lambda lst: [genre_to_idx[g] for g in lst])

    df['rating'] = min_max_scaler.fit_transform(df[['rating']])
    df['rating_timestamp'] = standard_scaler.fit_transform(df[['timestamp']])
    #data['tag_timestamp'] = standard_scaler.fit_transform(data[['timestamp_y']])

    df = df.drop(columns=['timestamp'])

    df = df.rename(columns={'(no genres listed)': 'no_genre'})

    df['year'] = df['title'].str.extract(r'\((\d{4})\)')

    df = df.dropna()

    df['example_age'] = (pd.to_datetime("now") - pd.to_datetime(df['year']))\
            /np.timedelta64(1,'D')
    df['example_age'] = min_max_scaler.fit_transform(df[['example_age']])

    user = df.groupby(['userId']).agg({
        'movieId': list,
        'rating': list,
        'genres': list,
    }).reset_index()

    user['genres'] = user['genres'].apply(lambda x: [item for sublist in x for item in sublist])

    movies = df.drop(columns=['userId','rating','rating_timestamp'])
    movies = movies.drop_duplicates(subset=['movieId'])

    return user, movies

In [11]:
user_processed, movies_processed = process(data)

In [12]:
user_processed

Unnamed: 0,userId,movieId,rating,genres
0,0,"[0, 2, 5, 43, 46, 62, 89, 97, 124, 130, 136, 1...","[0.7777777777777777, 0.7777777777777777, 0.777...","[2, 3, 4, 5, 9, 5, 15, 1, 6, 17, 14, 17, 6, 14..."
1,1,"[277, 291, 1283, 2670, 4607, 5294, 6236, 6298,...","[0.5555555555555556, 0.7777777777777777, 0.888...","[6, 8, 5, 8, 15, 1, 2, 8, 1, 6, 17, 1, 6, 8, 1..."
2,2,"[30, 461, 545, 565, 585, 656, 696, 831, 852, 9...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","[8, 8, 18, 1, 6, 8, 18, 1, 2, 5, 18, 2, 3, 5, ..."
3,3,"[20, 31, 41, 43, 47, 52, 94, 108, 109, 135, 14...","[0.5555555555555556, 0.3333333333333333, 0.555...","[5, 6, 17, 14, 16, 17, 5, 8, 17, 14, 17, 5, 8,..."
4,4,"[0, 20, 32, 33, 35, 46, 52, 97, 123, 126, 198,...","[0.7777777777777777, 0.7777777777777777, 0.777...","[2, 3, 4, 5, 9, 5, 6, 17, 4, 8, 6, 8, 5, 15, 6..."
...,...,...,...,...
605,605,"[0, 6, 10, 14, 16, 17, 18, 27, 28, 31, 33, 42,...","[0.4444444444444445, 0.4444444444444445, 0.444...","[2, 3, 4, 5, 9, 5, 15, 5, 8, 15, 1, 2, 15, 8, ..."
606,606,"[0, 10, 24, 32, 33, 77, 97, 99, 123, 126, 138,...","[0.7777777777777777, 0.5555555555555556, 0.555...","[2, 3, 4, 5, 9, 5, 8, 15, 8, 15, 4, 8, 6, 8, 1..."
607,607,"[0, 1, 2, 9, 15, 18, 20, 23, 30, 31, 32, 35, 4...","[0.4444444444444445, 0.3333333333333333, 0.333...","[2, 3, 4, 5, 9, 2, 4, 9, 5, 15, 1, 2, 17, 6, 8..."
608,608,"[0, 9, 97, 101, 114, 123, 134, 156, 176, 197, ...","[0.5555555555555556, 0.7777777777777777, 0.555...","[2, 3, 4, 5, 9, 1, 2, 17, 1, 8, 18, 7, 7, 2, 8..."


In [13]:
movies_processed

Unnamed: 0,movieId,title,genres,genres_list,genres_encoded,year,example_age
0,0,Toy Story (1995),"[2, 3, 4, 5, 9]","[Adventure, Animation, Children, Comedy, Fantasy]","[2, 3, 4, 5, 9]",1995,0.198282
1,2,Grumpier Old Men (1995),"[5, 15]","[Comedy, Romance]","[5, 15]",1995,0.198282
2,5,Heat (1995),"[1, 6, 17]","[Action, Crime, Thriller]","[1, 6, 17]",1995,0.198282
3,43,Seven (a.k.a. Se7en) (1995),"[14, 17]","[Mystery, Thriller]","[14, 17]",1995,0.198282
4,46,"Usual Suspects, The (1995)","[6, 14, 17]","[Crime, Mystery, Thriller]","[6, 14, 17]",1995,0.198282
...,...,...,...,...,...,...,...
100820,9307,Bloodmoon (1997),"[1, 17]","[Action, Thriller]","[1, 17]",1997,0.181029
100821,9312,Sympathy for the Underdog (1971),"[1, 6, 8]","[Action, Crime, Drama]","[1, 6, 8]",1971,0.405178
100823,9324,Hazard (2005),"[1, 8, 17]","[Action, Drama, Thriller]","[1, 8, 17]",2005,0.112063
100827,9371,Blair Witch (2016),"[11, 17]","[Horror, Thriller]","[11, 17]",2016,0.017253


In [14]:
'''{
'user_id': np.int64(945),
'movie_history': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 205, 274],
'genre_history': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3],
'target_movie': 420,
'label': 0}'''

"{\n'user_id': np.int64(945),\n'movie_history': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 205, 274],\n'genre_history': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3],\n'target_movie': 420,\n'label': 0}"

In [15]:
def pad_sequence(sequence, max_length):
      """Pad sequence với zeros"""
      if len(sequence) >= max_length:
          return sequence[-max_length:]
      else:
          return [0] * (max_length - len(sequence)) + sequence

In [16]:
movie_vocab_size = max(movies_processed['movieId']) + 1
genre_vocab_size = len(unique_genres)
user_vocab_size = max(user_processed['userId']) + 1


In [17]:
import random

def create_candidate_training_data(movies_data, users_data,
                                     negative_samples=3):
        """
        Tạo training data cho Candidate Generation model
        """
        print("Creating candidate generation training data...")

        train_data = []



        for user_id in users_data['userId']:

            user_movies = users_data[users_data['userId'] == user_id]['movieId'].values[0]
            user_genres = users_data[users_data['userId'] == user_id]['genres'].values[0]

            if len(user_movies) < 2:
                continue

            # Tạo training examples theo cách "predicting future watch"
            for i in range(1, len(user_movies)):
                # Input: history trước thời điểm i
                history_movies = user_movies[:i]
                history_genres = user_genres[:i]


                # Label: movie tại thời điểm i
                target_movie = user_movies[i]

                # Pad sequences
                padded_movies = pad_sequence(history_movies, 50)
                padded_genres = pad_sequence(history_genres, 50)

                # Positive example
                train_data.append({
                    'user_id': user_id,
                    'movie_history': padded_movies,
                    'genre_history': padded_genres,
                    'target_movie': target_movie,
                    'label': 1
                })

                # Negative sampling
                for _ in range(negative_samples):
                    neg_movie = random.randint(0, movie_vocab_size - 1)
                    while neg_movie in user_movies:  # Avoid movies user has watched
                        neg_movie = random.randint(0, movie_vocab_size - 1)

                    train_data.append({
                        'user_id': user_id,
                        'movie_history': padded_movies,
                        'genre_history': padded_genres,
                        'target_movie': neg_movie,
                        'label': 0
                    })

        return train_data

In [18]:
def create_ranking_training_data(users_data, movies_data):
        """
        Tạo training data cho Ranking model
        """
        print("Creating ranking training data...")

        ranking_data = []

        for user_id in users_data['userId']:

            user_movies = users_data[users_data['userId'] == user_id]['movieId'].values[0]
            user_ratings = users_data[users_data['userId'] == user_id]['rating'].values[0]

            if len(user_movies) < 5:
                continue

            for i in range(2, len(user_movies)):
                # Features cho ranking
                current_movie = user_movies[i]
                current_rating = user_ratings[i]


                # Rating history (last 10 ratings)
                rating_history = user_ratings[:i][-10:]
                rating_history_padded = pad_sequence(rating_history, 10)

                # Time since last interaction (simplified)
                time_since_last = 1.0 / (i + 1)  # Simple time decay

                # Get genre của current movie
                movie_genre = movies_data[movies_data['movieId'] == current_movie]['genres'].values[0]

                '''if len(movie_genre) > 1:
                    movie_genre = movie_genre[0]
                else:
                    movie_genre = movie_genre[0]'''

                movie_genre = movie_genre[0]

                ranking_data.append({
                    'user_id': user_id,
                    'movie_id': current_movie,
                    'genre_id': movie_genre,
                    'rating_history': rating_history_padded,
                    'time_since_last': time_since_last,
                    'target_rating': current_rating
                })

        return ranking_data

In [19]:
from tensorflow.keras import layers, Model, optimizers
import tensorflow as tf

In [20]:
def build_candidate_model():
        """
        Xây dựng Candidate Generation Model
        """
        print("Building candidate generation model...")

        # Input layers
        user_input = layers.Input(shape=(), name='user_id')
        movie_history_input = layers.Input(shape=(50,), name='movie_history')
        genre_history_input = layers.Input(shape=(50,), name='genre_history')

        # Embedding layers
        user_embedding = layers.Embedding(user_vocab_size, embedding_dim,
                                        name='user_embedding')(user_input)
        movie_embedding_layer = layers.Embedding(movie_vocab_size, embedding_dim,
                                                name='movie_embedding')
        genre_embedding_layer = layers.Embedding(genre_vocab_size, embedding_dim//4,
                                                name='genre_embedding')

        # Process sequences
        movie_history_embedded = movie_embedding_layer(movie_history_input)
        genre_history_embedded = genre_embedding_layer(genre_history_input)

        # Average pooling for sequences (như trong paper)
        movie_history_avg = layers.GlobalAveragePooling1D()(movie_history_embedded)
        genre_history_avg = layers.GlobalAveragePooling1D()(genre_history_embedded)

        # Flatten user embedding
        user_embedded_flat = layers.Flatten()(user_embedding)

        # Concatenate all features
        concat_features = layers.Concatenate()([
            user_embedded_flat,
            movie_history_avg,
            genre_history_avg
        ])

        # Deep layers (tower architecture)
        x = concat_features
        for units in candidate_model_units[:-1]:
            x = layers.Dense(units, activation='relu')(x)
            x = layers.Dropout(0.2)(x)

        # Output layer cho classification
        output = layers.Dense(1, activation='sigmoid', name='candidate_output')(x)

        model = Model(inputs=[user_input, movie_history_input, genre_history_input],
                     outputs=output)

        model.compile(
            optimizer=optimizers.Adam(learning_rate=0.001),
            loss='binary_crossentropy',
            metrics=['accuracy']
        )

        return model

In [21]:
def build_ranking_model():
        """
        Xây dựng Ranking Model
        """
        print("Building ranking model...")

        # Input layers
        user_input = layers.Input(shape=(), name='user_id_rank')
        movie_input = layers.Input(shape=(), name='movie_id_rank')
        genre_input = layers.Input(shape=(), name='genre_id_rank')
        rating_history_input = layers.Input(shape=(10,), name='rating_history')
        time_since_last_input = layers.Input(shape=(), name='time_since_last')

        # Shared embeddings (quan trọng như trong paper)
        user_embedding_layer = layers.Embedding(user_vocab_size, embedding_dim//2)
        movie_embedding_layer = layers.Embedding(movie_vocab_size, embedding_dim//2)
        genre_embedding_layer = layers.Embedding(genre_vocab_size, embedding_dim//4)

        # Embeddings
        user_embedded = layers.Flatten()(user_embedding_layer(user_input))
        movie_embedded = layers.Flatten()(movie_embedding_layer(movie_input))
        genre_embedded = layers.Flatten()(genre_embedding_layer(genre_input))

        # Process rating history
        rating_avg = layers.GlobalAveragePooling1D()(
            layers.Reshape((10, 1))(rating_history_input)
        )
        rating_avg_flat = layers.Flatten()(rating_avg)

        # Time feature normalization
        time_normalized = layers.Lambda(lambda x: tf.clip_by_value(x, 0, 1))(time_since_last_input)
        time_expanded = layers.Reshape((1,))(time_normalized)

        # Concatenate features
        concat_features = layers.Concatenate()([
            user_embedded,
            movie_embedded,
            genre_embedded,
            rating_avg_flat,
            time_expanded
        ])

        # Deep layers
        x = concat_features
        for units in ranking_model_units:
            x = layers.Dense(units, activation='relu')(x)
            x = layers.Dropout(0.3)(x)

        # Output layer - predict expected rating (tương tự expected watch time)
        output = layers.Dense(1, activation='linear', name='ranking_output')(x)

        model = Model(inputs=[user_input, movie_input, genre_input,
                             rating_history_input, time_since_last_input],
                     outputs=output)

        model.compile(
            optimizer=optimizers.Adam(learning_rate=0.001),
            loss='mse',
            metrics=['mae'],
        )

        return model

In [22]:
def train_ranking_model(ranking_data, validation_split=0.2, epochs=20, batch_size=512):
        """
        Train Ranking Model
        """
        print("Training ranking model...")

        # Convert to arrays
        user_ids = np.array([d['user_id'] for d in ranking_data])
        movie_ids = np.array([d['movie_id'] for d in ranking_data])
        genre_ids = np.array([d['genre_id'] for d in ranking_data])
        rating_histories = np.array([d['rating_history'] for d in ranking_data])
        time_features = np.array([d['time_since_last'] for d in ranking_data])
        target_ratings = np.array([d['target_rating'] for d in ranking_data])

        # Build model
        ranking_model = build_ranking_model()

        # Train
        history = ranking_model.fit(
            [user_ids, movie_ids, genre_ids, rating_histories, time_features],
            target_ratings,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            verbose=1
        )

        return ranking_model

In [23]:
def train_candidate_model(train_data, validation_split=0.2, epochs=20, batch_size=512):
        """
        Train Candidate Generation Model
        """
        print("Training candidate generation model...")

        # Convert to arrays
        user_ids = np.array([d['user_id'] for d in train_data])
        movie_histories = np.array([d['movie_history'] for d in train_data])
        genre_histories = np.array([d['genre_history'] for d in train_data])
        labels = np.array([d['label'] for d in train_data])

        # Build model
        candidate_model = build_candidate_model()

        # Train
        history = candidate_model.fit(
            [user_ids, movie_histories, genre_histories],
            labels,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            verbose=1
        )

        return candidate_model

In [24]:
def get_candidate_recommendations(user_id, user_data, candidate_model, top_k=100):
        """
        Generate candidates using candidate model
        """
        if candidate_model is None:
            raise ValueError("Candidate model not trained yet!")

        candidates = []

        user_history = user_data[user_data['userId'] == user_id]['movieId'].values[0]
        genre_history = user_data[user_data['userId'] == user_id]['genres'].values[0]

        # Pad sequences
        padded_movies = pad_sequence(user_history, 50)
        padded_genres = pad_sequence(genre_history, 50)

        # Score all movies (simplified - in practice would use approximate search)
        batch_size = 1000
        for start_idx in range(0, movie_vocab_size, batch_size):
            end_idx = min(start_idx + batch_size, movie_vocab_size)
            batch_size_actual = end_idx - start_idx

            # Create batch inputs
            user_batch = np.array([user_id] * batch_size_actual)
            movie_history_batch = np.array([padded_movies] * batch_size_actual)
            genre_history_batch = np.array([padded_genres] * batch_size_actual)

            # Get predictions
            scores = candidate_model.predict([user_batch, movie_history_batch, genre_history_batch],
                                                verbose=0)

            # Store candidates with scores
            for i, score in enumerate(scores):
                movie_id = start_idx + i
                if movie_id not in user_history:  # Don't recommend already watched
                    candidates.append((movie_id, score[0]))

        # Sort by score and return top_k
        candidates.sort(key=lambda x: x[1], reverse=True)
        return candidates[:top_k]

In [25]:
def rank_candidates(user_id, candidates, user_data, ranking_model, top_k=10):
        """
        Rank candidates using ranking model
        """
        if ranking_model is None:
            raise ValueError("Ranking model not trained yet!")

        ranked_candidates = []

        user_rating_history = user_data[user_data['userId'] == user_id]['rating'].values[0]

        # Prepare ranking features
        rating_history_padded = pad_sequence(user_rating_history[-10:], 10)
        time_since_last = 0.1  # Recent interaction

        for movie_id, candidate_score in candidates:
            # Simplified genre assignment
            genre_id = random.randint(0, genre_vocab_size - 1)

            # Get ranking score
            ranking_score = ranking_model.predict([
                np.array([user_id]),
                np.array([movie_id]),
                np.array([genre_id]),
                np.array([rating_history_padded]),
                np.array([time_since_last])
            ], verbose=0)[0][0]

            ranked_candidates.append((movie_id, candidate_score, ranking_score))

        # Sort by ranking score
        ranked_candidates.sort(key=lambda x: x[2], reverse=True)
        return ranked_candidates[:top_k]


In [26]:
def recommend(user_id, user_data, top_k=10):
        """
        End-to-end recommendation
        """
        if user_id not in user_data['userId']:
            print(f"User {user_id} not found in training data")
            return []

        print(f"Getting candidates for user {user_id}...")
        # Stage 1: Candidate Generation
        candidates = get_candidate_recommendations(user_id, user_data, candidate_model, top_k=100)

        print(f"Ranking {len(candidates)} candidates...")
        # Stage 2: Ranking
        final_recommendations = rank_candidates(user_id, candidates, user_data, ranking_model, top_k=top_k)

        return final_recommendations



In [27]:
embedding_dim=128
candidate_model_units=[512, 256, 128],
ranking_model_units=[256, 128, 64]

In [28]:
candidate_train_data = create_candidate_training_data(
        movies_processed, user_processed, negative_samples=5
)

Creating candidate generation training data...


In [29]:
ranking_train_data = create_ranking_training_data(
        user_processed, movies_processed
)

Creating ranking training data...


In [30]:
candidate_model = train_candidate_model(candidate_train_data, epochs=10, batch_size=256)

Training candidate generation model...
Building candidate generation model...
Epoch 1/10
[1m1879/1879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8312 - loss: 0.4581 - val_accuracy: 0.8333 - val_loss: 0.4516
Epoch 2/10
[1m1879/1879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8329 - loss: 0.4515 - val_accuracy: 0.8333 - val_loss: 0.4507
Epoch 3/10
[1m1879/1879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8338 - loss: 0.4500 - val_accuracy: 0.8333 - val_loss: 0.4514
Epoch 4/10
[1m1879/1879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8338 - loss: 0.4501 - val_accuracy: 0.8333 - val_loss: 0.4508
Epoch 5/10
[1m1879/1879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.8324 - loss: 0.4522 - val_accuracy: 0.8333 - val_loss: 0.4506
Epoch 6/10
[1m1879/1879[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 

In [32]:
ranking_model = train_ranking_model(ranking_train_data, epochs=10, batch_size=256)

Training ranking model...
Building ranking model...
Epoch 1/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - loss: 0.0872 - mae: 0.2268 - val_loss: 0.0458 - val_mae: 0.1714
Epoch 2/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 0.0425 - mae: 0.1615 - val_loss: 0.0430 - val_mae: 0.1627
Epoch 3/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0371 - mae: 0.1502 - val_loss: 0.0436 - val_mae: 0.1631
Epoch 4/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0338 - mae: 0.1429 - val_loss: 0.0443 - val_mae: 0.1643
Epoch 5/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0310 - mae: 0.1365 - val_loss: 0.0449 - val_mae: 0.1643
Epoch 6/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0275 - mae: 0.1282 - val_loss: 0.0472 - val_mae: 0.1708
Epoch 7/10
[1m312/312[0m [32m━━━

In [33]:
test_user = 0
recommendations = recommend(test_user, user_processed, top_k=5)


Getting candidates for user 0...
Ranking 100 candidates...


In [38]:
first_values = [item[0] for item in recommendations]


In [37]:
print(f"Top {len(recommendations)} recommendations for user {test_user}:")
for i in first_values:
    print("\n\t",movies_processed[movies_processed['movieId'] == i]['title'].values[0])

Top 5 recommendations for user 0:

	 Chungking Express (Chung Hing sam lam) (1994)

	 Persuasion (1995)

	 Postman, The (Postino, Il) (1994)

	 Twelve Monkeys (a.k.a. 12 Monkeys) (1995)

	 Antonia's Line (Antonia) (1995)
