## Implementing the Deep Matrix Factorization Method (DMF) on "Book-Crossing Dataset"

In [None]:
#Importing the necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:

# Loading the Book-Crossing dataset
path_to_data = 'data/BX-Book-Ratings.csv'
data = pd.read_csv(path_to_data, encoding='latin-1', delimiter=';')

In [None]:
# Preprocess the data
# For simplicity, let's consider only users who have rated at least 5 books
user_counts = data['User-ID'].value_counts()
active_users = user_counts[user_counts >= 5].index
filtered_data = data[data['User-ID'].isin(active_users)]

# Encode user and item IDs
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
filtered_data['user_id'] = user_encoder.fit_transform(filtered_data['User-ID'])
filtered_data['item_id'] = item_encoder.fit_transform(filtered_data['ISBN'])

# Split the data into train and test sets
train_data, test_data = train_test_split(filtered_data, test_size=0.2, random_state=42)

In [None]:
# Define the deep matrix factorization model using Keras
class DeepMatrixFactorization(tf.keras.Model):
    def __init__(self, num_users, num_items, latent_dim):
        super(DeepMatrixFactorization, self).__init__()
        self.user_embedding = tf.keras.layers.Embedding(num_users, latent_dim)
        self.item_embedding = tf.keras.layers.Embedding(num_items, latent_dim)
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(64, activation='relu')
        self.dense2 = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        user_input, item_input = inputs
        user_embedding = self.flatten(self.user_embedding(user_input))
        item_embedding = self.flatten(self.item_embedding(item_input))
        concatenated = tf.concat([user_embedding, item_embedding], axis=-1)
        x = self.dense1(concatenated)
        output = self.dense2(x)
        return output

In [None]:
# This is a basic example.
# We may need to adjust the model architecture and hyperparameters based on the specific needs and dataset characteristics. 
# Additionally, we can consider using more advanced techniques, such as incorporating additional features or using more sophisticated architectures, for better performance in real-world scenarios.

In [None]:
# Get the number of unique users and items
num_users = len(filtered_data['user_id'].unique())
num_items = len(filtered_data['item_id'].unique())

# Hyperparameters
latent_dim = 50
num_epochs = 10
batch_size = 256

# Instantiate the model
model = DeepMatrixFactorization(num_users, num_items, latent_dim)

# Compile the model with binary cross-entropy loss and Adam optimizer
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training data
train_user_ids = train_data['user_id'].values
train_item_ids = train_data['item_id'].values
train_labels = np.ones_like(train_user_ids)  # Since we are using binary cross-entropy loss

# Test data
test_user_ids = test_data['user_id'].values
test_item_ids = test_data['item_id'].values
test_labels = np.ones_like(test_user_ids)  # Since we are using binary cross-entropy loss

In [None]:
# Train the model
model.fit([train_user_ids, train_item_ids], train_labels, epochs=num_epochs, batch_size=batch_size, validation_data=([test_user_ids, test_item_ids], test_labels))

# Evaluate the model
test_loss, test_accuracy = model.evaluate([test_user_ids, test_item_ids], test_labels)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')