In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Concatenate, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanSquaredError

# Load MovieLens dataset
ratings_data = pd.read_csv('ratings.csv')  # Assuming ratings.csv contains user ratings data

# Preprocess data
num_users = ratings_data['userId'].nunique()
num_items = ratings_data['movieId'].nunique()
train_data, test_data = train_test_split(ratings_data, test_size=0.2, random_state=42)

# Build model
embedding_size = 50  # Embedding size for user and item embeddings
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))

user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_size)(item_input)

user_flatten = Flatten()(user_embedding)
item_flatten = Flatten()(item_embedding)

dot_product = Dot(axes=1)([user_flatten, item_flatten])

concatenated = Concatenate(axis=-1)([user_flatten, item_flatten])
dense_layer_1 = Dense(64, activation='relu')(concatenated)
output = Dense(1)(dense_layer_1)

model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[MeanSquaredError()])

# Train model
model.fit([train_data['userId'], train_data['movieId']], train_data['rating'],
          batch_size=64, epochs=5, validation_split=0.1)

# Evaluate model
loss, mse = model.evaluate([test_data['userId'], test_data['movieId']], test_data['rating'])
print(f'Test Loss: {loss}, Test MSE: {mse}')


Epoch 1/5


InvalidArgumentError: Graph execution error:

Detected at node model/embedding_1/embedding_lookup defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-1-eaf95a53cf27>", line 38, in <cell line: 38>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1807, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1150, in train_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 590, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/functional.py", line 515, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/functional.py", line 672, in _run_internal_graph

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/embedding.py", line 272, in call

indices[4,0] = 103253 is not in [0, 9724)
	 [[{{node model/embedding_1/embedding_lookup}}]] [Op:__inference_train_function_1133]

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Concatenate, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanSquaredError

# Load MovieLens dataset
ratings_data = pd.read_csv('ratings.csv')  # Assuming ratings.csv contains user ratings data

# Preprocess data
user_ids = ratings_data['userId'].unique()
item_ids = ratings_data['movieId'].unique()
num_users = len(user_ids)
num_items = len(item_ids)

# Map user and item IDs to contiguous integers starting from 0
user_id_map = {old_id: new_id for new_id, old_id in enumerate(user_ids)}
item_id_map = {old_id: new_id for new_id, old_id in enumerate(item_ids)}

ratings_data['userId'] = ratings_data['userId'].map(user_id_map)
ratings_data['movieId'] = ratings_data['movieId'].map(item_id_map)

train_data, test_data = train_test_split(ratings_data, test_size=0.2, random_state=42)

# Build model
embedding_size = 50  # Embedding size for user and item embeddings
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))

user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_size)(item_input)

user_flatten = Flatten()(user_embedding)
item_flatten = Flatten()(item_embedding)

dot_product = Dot(axes=1)([user_flatten, item_flatten])

concatenated = Concatenate(axis=-1)([user_flatten, item_flatten])
dense_layer_1 = Dense(64, activation='relu')(concatenated)
output = Dense(1)(dense_layer_1)

model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[MeanSquaredError()])

# Train model
model.fit([train_data['userId'], train_data['movieId']], train_data['rating'],
          batch_size=64, epochs=5, validation_split=0.1)

# Evaluate model
loss, mse = model.evaluate([test_data['userId'], test_data['movieId']], test_data['rating'])
print(f'Test Loss: {loss}, Test MSE: {mse}')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.7631617784500122, Test MSE: 0.7631617784500122


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Concatenate, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanSquaredError

# Load MovieLens dataset
ratings_data = pd.read_csv('ratings.csv')
movies_data = pd.read_csv("movies.csv")  # Assuming ratings.csv contains user ratings data

# Preprocess data
user_ids = ratings_data['userId'].unique()
item_ids = ratings_data['movieId'].unique()
num_users = len(user_ids)
num_items = len(item_ids)

# Map user and item IDs to contiguous integers starting from 0
user_id_map = {old_id: new_id for new_id, old_id in enumerate(user_ids)}
item_id_map = {old_id: new_id for new_id, old_id in enumerate(item_ids)}

ratings_data['userId'] = ratings_data['userId'].map(user_id_map)
ratings_data['movieId'] = ratings_data['movieId'].map(item_id_map)

train_data, test_data = train_test_split(ratings_data, test_size=0.2, random_state=42)

# Build model
embedding_size = 100  # Embedding size for user and item embeddings
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))

user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_size)(item_input)

user_flatten = Flatten()(user_embedding)
item_flatten = Flatten()(item_embedding)

dot_product = Dot(axes=1)([user_flatten, item_flatten])

concatenated = Concatenate(axis=-1)([user_flatten, item_flatten])
dense_layer_1 = Dense(64, activation='relu')(concatenated)
output = Dense(1)(dense_layer_1)

model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[MeanSquaredError()])

# Train model
model.fit([train_data['userId'], train_data['movieId']], train_data['rating'],
          batch_size=64, epochs=10, validation_split=0.1)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7e5ab13eae90>

In [None]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 embedding_2 (Embedding)     (None, 1, 50)                30500     ['input_3[0][0]']             
                                                                                                  
 embedding_3 (Embedding)     (None, 1, 50)                486200    ['input_4[0][0]']             
                                                                                            

In [None]:
# Predict movies for a user
user_id = 1  # Example user ID
user_ratings = np.array([user_id] * num_items)
print(user_ratings)
movie_ids = np.arange(num_items)

print(movie_ids)

predictions = model.predict([user_ratings, movie_ids])
predicted_ratings = predictions.flatten()

print(predicted_ratings.shape)
# Get top recommended movies
top_movies_indices = np.argsort(predicted_ratings)[::-1][:5]
top_movies_ids = [item_ids[i] for i in top_movies_indices]

# Map movie IDs back to their original IDs
top_movies_titles = movies_data[movies_data['movieId'].isin(top_movies_ids)]['title'].tolist()
print("Top recommended movies for user {}: ".format(user_id))
for i, title in enumerate(top_movies_titles):
    print(f"{i+1}. {title}")


[1 1 1 ... 1 1 1]
[   0    1    2 ... 9721 9722 9723]
(9724,)
Top recommended movies for user 1: 
1. Ex Drummer (2007)
2. Dream of Light (a.k.a. Quince Tree Sun, The) (Sol del membrillo, El) (1992)
3. Deathgasm (2015)
4. Miss Sloane (2016)
5. De platte jungle (1978)


In [None]:
train_data

Unnamed: 0,userId,movieId,rating,timestamp
80568,508,4285,3.0,1435994597
50582,325,5629,4.0,1322252335
8344,56,134,3.0,965798155
99603,609,66,4.0,1479544102
71701,461,1172,2.0,1174438249
...,...,...,...,...
6265,41,1873,4.0,996259059
54886,363,524,4.0,869443367
76820,479,2240,4.0,1179163171
860,5,712,3.0,845556567
