<a href="https://colab.research.google.com/github/SMotaee/Deep-Learning/blob/main/Handling%20missing%20values%20through%20Matrix%20Factorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Split the dataset into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Create the user-item matrix
num_users = data.user_id.nunique()
num_movies = data.movie_id.nunique()
user_item_matrix = np.zeros((num_users, num_movies))

for row in train_data.itertuples():
    user_item_matrix[row[1]-1, row[2]-1] = row[3]


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Add
from tensorflow.keras.models import Model

# Set the embedding dimension
embedding_dim = 10

# Create the user embedding layer
user_input = Input(shape=[1])
user_embedding = Embedding(num_users, embedding_dim)(user_input)
user_bias = Embedding(num_users, 1)(user_input)

# Create the item embedding layer
item_input = Input(shape=[1])
item_embedding = Embedding(num_movies, embedding_dim)(item_input)
item_bias = Embedding(num_movies, 1)(item_input)

# Calculate the dot product of the user and item embeddings
dot_product = Dot(axes=2)([user_embedding, item_embedding])

# Add the user and item biases to the dot product
output = Add()([dot_product, user_bias, item_bias])
output = Flatten()(output)

# Create the matrix factorization model
model = Model(inputs=[user_input, item_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(x=[train_data.user_id.values, train_data.movie_id.values],
                    y=train_data.rating.values,
                    batch_size=64,
                    epochs=10,
                    validation_data=([test_data.user_id.values, test_data.movie_id.values], test_data.rating.values))


In [None]:
# Calculate the root mean squared error (RMSE)
from sklearn.metrics import mean_squared_error

train_predictions = model.predict([train_data.user_id.values, train_data.movie_id.values])
test_predictions = model.predict([test_data.user_id.values, test_data.movie_id.values])

train_rmse = np.sqrt(mean_squared_error(train_data.rating.values, train_predictions))
test_rmse = np.sqrt(mean_squared_error(test_data.rating.values, test_predictions))

print("Train RMSE: {:.2f}".format(train_rmse))
print("Test RMSE: {:.2f}".format(test_rmse))
