In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from tensorflow.keras.optimizers import Adam


In [3]:
# Generate synthetic user, item, and interaction data
num_users = 1000  # Number of unique users
num_items = 500  # Number of unique items
interaction_size = 10000  # Number of interactions (user-item pairs)

# Randomly generate user, item pairs and a rating/preference score
user_ids = np.random.randint(0, num_users, interaction_size)
item_ids = np.random.randint(0, num_items, interaction_size)
ratings = np.random.randint(1, 6, interaction_size)  # Rating scale from 1 to 5

# Create a DataFrame to store the dataset
df = pd.DataFrame({'user_id': user_ids, 'item_id': item_ids, 'rating': ratings})

# Split the dataset into training and testing
train, test = train_test_split(df, test_size=0.2, random_state=42)


In [4]:
# Set hyperparameters
embedding_size = 50  # Size of the embedding for users and items
dropout_rate = 0.3

# Inputs
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')

# User and item embedding layers
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_size, name='item_embedding')(item_input)

# Flatten the embeddings
user_flatten = Flatten()(user_embedding)
item_flatten = Flatten()(item_embedding)

# Concatenate user and item embeddings
concat = Concatenate()([user_flatten, item_flatten])

# Fully connected layers
dense1 = Dense(128, activation='relu')(concat)
dropout1 = Dropout(dropout_rate)(dense1)
dense2 = Dense(64, activation='relu')(dropout1)
dropout2 = Dropout(dropout_rate)(dense2)

# Output layer (predicting the rating)
output = Dense(1, activation='linear')(dropout2)

# Create the model
model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Summary of the model
model.summary()


In [5]:
# Prepare training and testing data
train_user_input = train['user_id'].values
train_item_input = train['item_id'].values
train_ratings = train['rating'].values

test_user_input = test['user_id'].values
test_item_input = test['item_id'].values
test_ratings = test['rating'].values

# Train the model
history = model.fit(
    [train_user_input, train_item_input], train_ratings,
    validation_data=([test_user_input, test_item_input], test_ratings),
    epochs=10, batch_size=64
)


Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - loss: 6.5486 - val_loss: 2.0183
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 2.0976 - val_loss: 2.0585
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.9531 - val_loss: 2.1333
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.9037 - val_loss: 2.1459
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.8551 - val_loss: 2.1718
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 1.7908 - val_loss: 2.2687
Epoch 7/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 1.7643 - val_loss: 2.3100
Epoch 8/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 1.6645 - val_loss: 2.2296
Epoch 9/10
[1m125/125[0m [32m━━━━━━━

In [6]:
# Evaluate on the test set
test_loss = model.evaluate([test_user_input, test_item_input], test_ratings)
print(f"Test Loss (MSE): {test_loss}")


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.5114
Test Loss (MSE): 2.4556198120117188


In [7]:
# Predict ratings for a user-item pair
user_id = 10  # Example user
item_id = 50  # Example item

predicted_rating = model.predict([np.array([user_id]), np.array([item_id])])
print(f"Predicted rating for user {user_id} on item {item_id}: {predicted_rating[0][0]}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 373ms/step
Predicted rating for user 10 on item 50: 1.884240746498108
