In [3]:
###model.py

import numpy as np
import tensorflow as tf
from keras import layers, regularizers, optimizers
import polars as pl
from keras.saving import register_keras_serializable

@register_keras_serializable()
class ColaborativeFiltering(tf.keras.Model):
    def __init__(self, num_user_features, num_movie_features, user_layers=[128, 64], movie_layers=[128, 64], embedding=32, learning_rate=0.001, user_reg = None, movie_reg = None, **kwargs):
        super().__init__(**kwargs)
        self.num_user_features = num_user_features
        self.num_movie_features = num_movie_features
        self.embedding = embedding
        self.learning_rate = learning_rate

        # User branch
        user_dense_layers = []
        for i, units in enumerate(user_layers):
            reg = user_reg[i] if user_reg is not None else None
            user_dense_layers.append(layers.Dense(units, activation='tanh', kernel_initializer='glorot_uniform', kernel_regularizer=reg))

        user_dense_layers.append(layers.Dense(self.embedding, activation='tanh', kernel_initializer='glorot_uniform'))
        self.user_net = tf.keras.Sequential(user_dense_layers)

        # movie branch
        movie_dense_layers = []
        for units in movie_layers:
            movie_dense_layers.append(layers.Dense(units, activation='tanh', kernel_initializer='glorot_uniform'))
        movie_dense_layers.append(layers.Dense(self.embedding, activation='tanh', kernel_initializer='glorot_uniform'))
        self.movie_net = tf.keras.Sequential(movie_dense_layers)

        self.dot = layers.Dot(axes=1, name='cosine_similarity')

        # Save architecture parameters for serialization
        self.user_layers = user_layers
        self.movie_layers = movie_layers
        self.user_reg = user_reg
        self.movie_reg = movie_reg

        self.compile(
            optimizer=optimizers.Nadam(learning_rate=self.learning_rate),
            loss='mse',
            metrics=['mae', 'mse']
        )

    def get_config(self):
        config = super().get_config()
        config.update({
            'num_user_features': self.num_user_features,
            'num_movie_features': self.num_movie_features,
            'embedding': self.embedding,
            'learning_rate': self.learning_rate,
            'user_layers': self.user_layers,
            'movie_layers': self.movie_layers,
            'user_reg': self.user_reg,
            'movie_reg': self.movie_reg
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)

    def call(self, inputs):
        user_input, movie_input = inputs
        user_embedding = tf.nn.l2_normalize(self.user_net(user_input), axis=1)
        movie_embedding = tf.nn.l2_normalize(self.movie_net(movie_input), axis=1)
        cos_sim = self.dot([user_embedding, movie_embedding])
        return cos_sim

    def recommend(self, user_vec, movie_matrix, user_seen_movie_indices = None, k=10, movie_titles=None):
        user_vecs = tf.repeat(tf.reshape(user_vec, (1, -1)), tf.shape(movie_matrix)[0], axis=0)
        preds = self.predict([user_vecs, movie_matrix])
        # mask_indices = tf.constant(list(user_seen_movie_indices), dtype=tf.int32)
        # preds = tf.tensor_scatter_nd_update(
        #     tf.squeeze(preds),
        #     tf.expand_dims(mask_indices, 1),
        #     tf.fill([tf.size(mask_indices)], tf.constant(-float('inf'), dtype=preds.dtype))
        # )
        top_k_idx = tf.argsort(preds, direction='DESCENDING')[:k]
        if movie_titles is not None:
            return [(movie_titles[int(i)], float(preds[i])) for i in top_k_idx]
        else:
            return [(int(i), float(preds[i])) for i in top_k_idx]

    def get_user_seen_movie_indices(self, user_id, ratings, movies):
        gledani_movieid = set(ratings.filter(pl.col('userid') == user_id)['movieid'].to_list())
        movieid_to_idx = {movie_id: idx for idx, movie_id in enumerate(movies['movieid'].to_list())}
        return {movieid_to_idx[movie_id] for movie_id in gledani_movieid if movie_id in movieid_to_idx}

In [4]:
###prep.py


import polars as pl
import os
from sqlalchemy import create_engine
import tensorflow as tf

'''Funkcije za pripremu podataka za collaborative filtering model'''

def read_data_lake():
    '''
    Data lake --> Polars.DataFrame
    '''
    engine = create_engine(f"postgresql+psycopg2://postgres:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/movie_recommendation")
    conn = engine.connect()
    ratings = pl.read_database(query='SELECT * FROM data_lake.ratings', connection=conn)
    movies = pl.read_database(query='SELECT * FROM raw.movies', connection=conn)
    conn.close()
    return ratings, movies

def prep_pipeline(ratings, movies, user_id = None):
    '''
    Priprema za model
    '''
    #PROSECAN BROJ OCENA PO FILMU
    num_ratings = ratings.group_by('movieid').agg(pl.len().alias('#ratings_film'))
    user = ratings.join(num_ratings, on = 'movieid', how = 'inner').sort(['movieid', 'userid'])
    movies = movies.with_columns(pl.col("genres").str.split("|"))
    unique_genres = sorted(set(g for genre in movies["genres"] for g in genre))
    #LAZY!
    user = user.lazy()
    movies = movies.lazy()
    #SVI ZANROVI
    for genre in unique_genres:
        movies = movies.with_columns(pl.col("genres").list.contains(genre).cast(pl.Int8).alias(genre))
    movies = movies.drop('genres')
    #KOLONA GODINA
    movies = movies.with_columns(pl.col("title").str.extract(r"\((\d{4})\)", 1).cast(pl.Int16).alias("year"))

    #ISTI FORMAT TABELE KAO MOVIES
    user_zanr_train = user.join(movies, on='movieid', how='inner')

    #PIVOT LONGER --> ZANROVE PREBACUJEM U JEDNU KOLONU
    user_longer = (user_zanr_train.unpivot(index=['userid', 'rating'],
                                           on=unique_genres).filter(pl.col('value') == 1).rename({'variable': 'genre', 'value': 'is_genre'}))

    #RACUNAM PROSEK ZA SVAKOG USERA ZA SVAKI ZANR I VRACAM U WIDE FORMAT
    user_feature = user_longer.group_by('userid').agg([(pl.when(pl.col('genre') == genre).then(pl.col('rating')).mean().alias(genre)) for genre in unique_genres]).fill_null(0)
    movie_avg_rating = (user.group_by('movieid').agg(pl.col('rating').mean().alias('avg_rating')))
    movie_features = movies.join(movie_avg_rating, on='movieid', how='left').fill_null(0)
    movie_features = movie_features.select(['movieid', 'title','year','avg_rating', *unique_genres])
    df = user.join(user_feature, on="userid", how="inner").join(movie_features, on="movieid", how="inner")
    df = df.collect()
    movie_features = movie_features.rename({"(no genres listed)": "no genres listed"})
    user_feature = user_feature.rename({"(no genres listed)": "no genres listed"})
    df = df.rename({"(no genres listed)": "no genres listed"})
    user_feature = user_feature.sort('userid')
    df = df.sort('userid')

    return user_feature.collect(), movie_features.collect(), df


def global_scalers():
    engine = create_engine(f"postgresql+psycopg2://postgres:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/movie_recommendation")
    conn = engine.connect()
    df = pl.read_database(query='SELECT * FROM raw.ratings', connection=conn)
    user, movies_feat, df = prep_pipeline(df, pl.read_database(query='SELECT * FROM raw.movies', connection=conn))
    _, _ , _, scalers = scale(df, user, movies_feat)
    conn.close()
    return scalers

def scale(df, user, movies, user_id = None):
    '''
    Skaliranje numeričkih karakteristika i prebacivanje u tenzore
    df - Polars DataFrame sa svim podacima
    user - Polars DataFrame sa korisničkim karakteristikama
    movies - Polars DataFrame sa filmskim karakteristikama
    user_id - ako je None, onda se vracaju svi korisnici, ako je lista (ili int) onda se vraca samo taj korisnik

    '''
    y = tf.convert_to_tensor(df.select(pl.col('rating')).to_numpy(), dtype=tf.float16)

    prva_user = df.columns.index('no genres listed')
    poslednja_user = df.columns.index('Western')
    ###prva kolona u X_user_ud je userid!!!, trebace za preporuke, za treniranje koristiti X_user
    X_user_id = tf.convert_to_tensor(df.select(['userid'] + df.columns[prva_user : poslednja_user + 1]).to_numpy(), dtype=tf.float32)
    X_movie_df = df.select(['year','avg_rating', '#ratings_film'] + [col for col in df.columns if col.endswith('_right')])
    movie_num = tf.convert_to_tensor(X_movie_df.select(['#ratings_film', 'year', 'avg_rating']).to_numpy(), dtype=tf.float32)
    movie_cat = tf.convert_to_tensor(X_movie_df.select(pl.all().exclude(['#ratings_film', 'year', 'avg_rating'])).to_numpy(), dtype=tf.float32)
    # Standardizacija user i movie numeričkih
    X_user = X_user_id[:, 1:]
    user_mean = tf.reduce_mean(X_user, axis=0)
    user_std = tf.math.reduce_std(X_user, axis=0)
    X_user_scaled = (X_user - user_mean) / (user_std+ 1e-8)
    X_user_id_scaled = tf.concat([X_user_id[:, :1], X_user_scaled], axis=1)  # Skalirano sa ID kolonom
    movie_mean = tf.reduce_mean(movie_num, axis=0)
    movie_std = tf.math.reduce_std(movie_num, axis=0)
    movie_num_scaled = (movie_num - movie_mean) / (movie_std)
    X_movie_scaled = tf.concat([movie_num_scaled, movie_cat], axis=1)
    # Target skaliranje na [-1, 1]
    y_scaled = 2 * (y - tf.reduce_min(y)) / (tf.reduce_max(y) - tf.reduce_min(y)) - 1
    scalers = {"user_mean": user_mean, "user_std": user_std,"movie_mean": movie_mean,"movie_std": movie_std, "y_min": tf.reduce_min(y), "y_max": tf.reduce_max(y)}
    if user_id is not None:
        ###Ako je dat user id, filtriramo X_user_id_scaled i X_movie_scaled i vracamo samo korisnika sa tim user_id-om, ako nije vracamo sve korisnike
        maska = tf.reduce_any(tf.equal(tf.expand_dims(X_user_id_scaled[:, 0], 1), tf.constant(user_id, dtype=X_user_id_scaled.dtype)), axis=1)
        X_user_id_scaled = tf.boolean_mask(X_user_id_scaled, maska)
        X_movie_scaled = tf.boolean_mask(X_movie_scaled, maska)
        y_scaled = tf.boolean_mask(y_scaled, maska)
        return X_user_id_scaled, X_movie_scaled, y_scaled, scalers
    # Ako user_id nije naveden, vracamo sve korisnike bez filtriranja user_id-a
    else:
        return X_user_scaled, X_movie_scaled, y_scaled, scalers


def batch_generator(movies, batch_size=1000000, total = 2e7):
    '''
    Pravi skupove od batch_size (milion) iz nasumicnih total (20 miliona) redova u tabeli ratings
    '''
    engine = create_engine(f"postgresql+psycopg2://postgres:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/movie_recommendation")
    conn = engine.connect()
    offset = 0
    while offset < total:
        query = f"SELECT * FROM raw.ratings LIMIT {batch_size} OFFSET {offset}"
        batch = pl.read_database(query=query, connection=conn)
        if batch.height == 0:
            break
        user, movies_feat, df = prep_pipeline(batch, movies, batch)
        X_user, X_movie, y, scalers = scale(df, user, movies_feat)
        yield (X_user, X_movie), tf.squeeze(y)
        offset += batch_size
    conn.close()

# def train_test_split(X_user, X_movie, y, test_size=0.2, random_state= 42):
#     N = X_user.shape[0]
#     tf.random.set_seed(random_state)
#     idx = tf.random.shuffle(tf.range(N))
#     split = int(N * (1 - test_size))
#     train_idx = idx[:split]
#     dev_idx = idx[split:]

#     X_user_train, X_movie_train, y_train = tf.gather(X_user, train_idx), tf.gather(X_movie, train_idx), tf.gather(y, train_idx)

#     X_user_dev, X_movie_dev, y_dev = tf.gather(X_user, dev_idx), tf.gather(X_movie, dev_idx), tf.gather(y, dev_idx)

#     return (X_user_train, X_movie_train), y_train, (X_user_dev, X_movie_dev), y_dev




# def NN_prep(df, user, movies, user_id = None):
#     '''
#     Prebacivanje u tenzore i skaliranje --> tf.Tensor
#     user_id - za listu usera, ako je None onda vraca tf.Tensor sa svim userima
#     '''
#     y = tf.convert_to_tensor(df.select(pl.col('rating')).to_series().to_list(), dtype=tf.float32)
#     prva_user = df.columns.index('no genres listed')
#     poslednja_user = df.columns.index('Western')
#     if user_id is None:
#         X_user = tf.convert_to_tensor(df.select(df.columns[prva_user : poslednja_user + 1])
# .to_numpy(), dtype=tf.float32)
#     else:
#         X_user = tf.convert_to_tensor(df.filter(pl.col('userid') == user_id).select(df.columns[prva_user : poslednja_user + 1]).to_numpy(), dtype=tf.float32)
#     X_movie_df = df.select(['year','avg_rating', '#ratings_film'] + [col for col in df.columns if col.endswith('_right')])
#     movie_num = tf.convert_to_tensor(X_movie_df.select(['#ratings_film', 'year', 'avg_rating']).to_numpy(), dtype=tf.float32)
#     movie_cat = tf.convert_to_tensor(X_movie_df.select(pl.all().exclude(['#ratings_film', 'year', 'avg_rating'])).to_numpy(), dtype=tf.float32)
#     # Skaliranje (standardizacija) user i movie numeričkih karakteristika
#     user_mean = tf.reduce_mean(X_user, axis=0)
#     user_std = tf.math.reduce_std(X_user, axis=0)
#     X_user_scaled = (X_user - user_mean) / (user_std + 1e-7)
#     movie_mean = tf.reduce_mean(movie_num, axis=0)
#     movie_std = tf.math.reduce_std(movie_num, axis=0)
#     movie_num_scaled = (movie_num - movie_mean) / (movie_std)
#     X_movie_scaled = tf.concat([movie_num_scaled, movie_cat], axis=1)
#     # Target skaliranje na [-1, 1]
#     y_min = tf.reduce_min(y)
#     y_max = tf.reduce_max(y)
#     y_scaled = 2 * (y - y_min) / (y_max - y_min) - 1

#     # Vrati i transformatore za kasniju upotrebu
#     scalers = {"user_mean": user_mean, "user_std": user_std, "movie_mean": movie_mean, "movie_std": movie_std, "y_min": y_min, "y_max": y_max}

#     return X_user_scaled, X_movie_scaled, y_scaled, scalers

def inverse_transform_y(y_scaled, scalers):
    """
    Inverzna transformacija za y skaliran na [-1, 1].
    """
    y_min = scalers["y_min"]
    y_max = scalers["y_max"]
    y = (y_scaled + 1) * (y_max - y_min) / 2 + y_min
    return y

def inverse_transform_X_user(X_user_scaled, scalers):
    """
    Inverzna transformacija za X_user.
    """
    user_mean = scalers["user_mean"]
    user_std = scalers["user_std"]
    return X_user_scaled * (user_std + 1e-8) + user_mean

def inverse_transform_X_movie_num(X_movie_num_scaled, scalers):
    """
    Inverzna transformacija za numeričke karakteristike filma.
    """
    movie_mean = scalers["movie_mean"]
    movie_std = scalers["movie_std"]
    return X_movie_num_scaled * (movie_std + 1e-8) + movie_mean





In [5]:
###train.py

# from prep import *
# from model import ColaborativeFiltering
from sqlalchemy import create_engine
import polars as pl
import os
import tensorflow as tf
from keras import layers, Input, regularizers, Model, optimizers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import joblib

# engine = create_engine(f"postgresql+psycopg2://postgres:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/movie_recommendation")
# conn = engine.connect()
# movies = pl.read_database(query='SELECT * FROM raw.movies', connection=conn)
# conn.close()

total = 50000

data = tf.data.Dataset.from_generator(
    lambda: batch_generator(movies, batch_size=4096, total = total),
    output_signature=(
        (tf.TensorSpec(shape=(None, 20), dtype=tf.float32, name= 'X_user'),
         tf.TensorSpec(shape=(None, 23), dtype=tf.float32, name='X_movie')),
        tf.TensorSpec(shape=(None,), dtype=tf.float32, name='y')
    )
)

(X_user_dev, X_movie_dev), y_dev = next(iter(data))
training_batch = 4096
train_data = data.unbatch().batch(training_batch).skip(1).prefetch(tf.data.AUTOTUNE).repeat()

# import numpy as np
# np.isnan(X_user_dev.numpy()).any()
# np.isnan(X_movie_dev.numpy()).any()
# np.isnan(y_dev.numpy()).any()



# for batch in train_data.take(5):
#     (X_user_batch, X_movie_batch), y_batch = batch
#     print("X_user_batch shape:", X_user_batch.shape)
#     print("X_movie_batch shape:", X_movie_batch.shape)
#     print("y_batch shape:", y_batch.shape)
#     print("X_user_batch:", X_user_batch.numpy()[0])
#     print("X_movie_batch:", X_movie_batch.numpy()[0])
#     print("y_batch:", y_batch.numpy()[0])

for batch in train_data.take(15):
    (X_user_batch, X_movie_batch), y_batch = batch
    print(np.isnan(X_user_batch.numpy()).any(), np.isnan(X_movie_batch.numpy()).any(), np.isnan(y_batch.numpy()).any())
    print(np.isinf(X_user_batch.numpy()).any(), np.isinf(X_movie_batch.numpy()).any(), np.isinf(y_batch.numpy()).any())
    print("y_batch min/max:", y_batch.numpy().min(), y_batch.numpy().max())

# for i, batch in enumerate(train_data.take(8)):
#     (X_user_batch, X_movie_batch), y_batch = batch
#     nan_mask = np.isnan(X_user_batch.numpy()).any(axis=1)
#     if nan_mask.any():
#         print(f"Batch {i+1} ima NaN u X_user_batch na indeksima:", np.where(nan_mask)[0])
#         print("Redovi sa NaN:", X_user_batch.numpy()[nan_mask])
#         # Opcionalno: pogledaj i y_batch[nan_mask], X_movie_batch[nan_mask]



model = ColaborativeFiltering(20, 23 ,user_layers = [256, 128, 64],embedding=64, learning_rate=0.001)#, user_reg = [regularizers.l2(0.01), None, None])
model.summary()
callbacks = [EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True), ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6, verbose=1)]
history = model.fit(train_data, epochs=20, validation_data=([X_user_dev,  X_movie_dev], y_dev), callbacks=callbacks, steps_per_epoch = int(total // training_batch))


model.save('model_proba.keras')
joblib.dump(history, 'history_proba.pkl')
joblib.dump(scalers, 'scalers_proba.pkl')

# from tensorflow.keras.models import load_model
# load_model('model_proba.keras')





UnknownError: {{function_node __wrapped__IteratorGetNext_output_types_3_device_/job:localhost/replica:0/task:0/device:CPU:0}} NameError: name 'movies' is not defined
Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 865, in get_iterator
    return self._iterators[iterator_id]
           ~~~~~~~~~~~~~~~^^^^^^^^^^^^^

KeyError: np.int64(0)


During handling of the above exception, another exception occurred:


Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 867, in get_iterator
    iterator = iter(self._generator(*self._args.pop(iterator_id)))
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "<ipython-input-5-a0dea4b0d7c8>", line 21, in <lambda>
    lambda: batch_generator(movies, batch_size=4096, total = total),
                            ^^^^^^

NameError: name 'movies' is not defined


	 [[{{node PyFunc}}]] [Op:IteratorGetNext] name: 

In [6]:
###TRAIN.py

###
#samo za RZS
movies = pl.read_csv(r'https://raw.githubusercontent.com/BogdanSliskovic/ML/refs/heads/main/film/movies.csv')
movies.name = 'Movies'
ratings = pl.read_csv(r'https://raw.githubusercontent.com/BogdanSliskovic/ML/refs/heads/main/film/ratings_RZS.csv')
ratings.name = 'Ratings'

for df in [movies, ratings]:
  print(df.name , df.schema, df.shape)

user, movies_feat, df = prep_pipeline(ratings, movies)
X_user, X_movie, y, scalers = scale(df, user, movies_feat)

def prep_tf(user, movies, training_batch = 16):
  user, movies_feat, df = prep_pipeline(ratings, movies)
  X_user, X_movie, y, scalers = scale(df, user, movies_feat)
  data = (X_user, X_movie), y
  data = tf.data.Dataset.from_tensor_slices(data).batch(training_batch)
  return data
###
def split(data):
  (X_user_test, X_movie_test), y_test = next(iter(data))
  (X_user_dev, X_movie_dev), y_dev = next(iter(data.skip(1)))
  train_data = data.skip(2).prefetch(tf.data.AUTOTUNE).repeat()
  return ((X_user_test, X_movie_test, y_test), (X_user_dev, X_movie_dev, y_dev), train_data)

data = prep_tf(ratings, movies)
test_set, dev_set, train_data = split(data)

X_user_test, X_movie_test, y_test = test_set
X_user_dev, X_movie_dev, y_dev = dev_set


model = ColaborativeFiltering(20, 23 ,user_layers = [256, 128, 64],embedding=64, learning_rate=0.001)#, user_reg = [regularizers.l2(0.01), None, None])
callbacks = [EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True), ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6, verbose=1)]
history = model.fit(train_data, epochs=50,validation_data = ((X_user_dev, X_movie_dev), y_dev), callbacks=callbacks, steps_per_epoch = int(10000/16))




Movies Schema([('movieid', Int64), ('title', String), ('genres', String)]) (87585, 3)
Ratings Schema([('userid', Int64), ('movieid', Int64), ('rating', Float64)]) (10000, 3)
Epoch 1/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 0.0773 - mae: 0.2111 - mse: 0.0773 - val_loss: 0.0345 - val_mae: 0.1080 - val_mse: 0.0345 - learning_rate: 0.0010
Epoch 2/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0288 - mae: 0.1263 - mse: 0.0288 - val_loss: 0.0335 - val_mae: 0.1037 - val_mse: 0.0335 - learning_rate: 0.0010
Epoch 3/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0217 - mae: 0.1061 - mse: 0.0217 - val_loss: 0.0319 - val_mae: 0.1010 - val_mse: 0.0319 - learning_rate: 0.0010
Epoch 4/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0173 - mae: 0.0920 - mse: 0.0173 - val_loss: 0.0347 - val_mae: 0.1002 - val_mse: 0.0347 - learning_rate: 0

In [83]:
### inference.py
m_net = model.movie_net
m_embed = m_net.predict(X_movie)
print(m_embed.shape)

X_user_id, _, y_id, _ = scale(df, user, movies_feat, user_id = 28)
u_id = X_user_id[0,0]
m_id =
X_user_id = X_user_id[0,1:]  ##SVAKI RED JE ISTI, A PRVA KOL JE USER_ID

u_net = model.user_net
u_embed = u_net.predict(tf.expand_dims(X_user_id,0))
print(u_embed.shape)

pred = tf.linalg.matmul(u_embed, m_embed, transpose_b= True)
val, idx = tf.math.top_k(pred, k = 10)
tf.gather(pred, idx, axis = 1) == val

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
(10000, 64)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
(1, 64)


<tf.Tensor: shape=(1, 1, 10), dtype=bool, numpy=
array([[[ True,  True,  True,  True,  True,  True,  True,  True,  True,
          True]]])>

In [80]:
ratings

userid,movieid,rating
i64,i64,f64
36683,3741,4.0
16712,4082,2.5
131952,116161,2.5
147475,8360,4.0
131743,42004,4.0
…,…,…
40967,1007,3.5
16508,196,3.0
152948,78266,3.5
144987,182529,3.0


In [81]:
movies

movieid,title,genres
i64,str,str
1,"""Toy Story (1995)""","""Adventure|Animation|Children|C…"
2,"""Jumanji (1995)""","""Adventure|Children|Fantasy"""
3,"""Grumpier Old Men (1995)""","""Comedy|Romance"""
4,"""Waiting to Exhale (1995)""","""Comedy|Drama|Romance"""
5,"""Father of the Bride Part II (1…","""Comedy"""
…,…,…
292731,"""The Monroy Affaire (2022)""","""Drama"""
292737,"""Shelter in Solitude (2023)""","""Comedy|Drama"""
292753,"""Orca (2023)""","""Drama"""
292755,"""The Angry Breed (1968)""","""Drama"""
