In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(42)

num_users=100
num_products=50
num_interactions=1000

user_ids = np.random.choice(range(1, num_users+1), num_interactions)
product_ids = np.random.choice(range(1, num_products+1), num_interactions)
actions = np.random.choice(['view', 'click', 'purchase'], num_interactions, p=[0.7, 0.2, 0.1])

# Create DataFrame
data = pd.DataFrame({'user_id': user_ids, 'product_id': product_ids, 'action': actions})

# Display a sample of the dataset
print(data)

     user_id  product_id    action
0         52          34      view
1         93          47      view
2         15           8      view
3         72          40      view
4         61          49      view
..       ...         ...       ...
995       10          41      view
996       67          13      view
997       18          30  purchase
998      100          33      view
999       86          13      view

[1000 rows x 3 columns]


In [3]:
# Step 2: Convert actions to ratings
# Mapping actions to numerical ratings
action_to_rating = {'view': 1, 'click': 2, 'purchase': 3}
data['rating'] = data['action'].map(action_to_rating)

# Step 3: Create a user-item interaction matrix
# Pivot table to create the interaction matrix
interaction_matrix = data.pivot_table(index='user_id', columns='product_id', values='rating', fill_value=0)

# Display a sample of the dataset
print(data.head())


   user_id  product_id action  rating
0       52          34   view       1
1       93          47   view       1
2       15           8   view       1
3       72          40   view       1
4       61          49   view       1


In [4]:
print(interaction_matrix.head())

product_id   1    2    3    4    5    6    7    8    9    10  ...   41   42  \
user_id                                                       ...             
1           0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
2           0.0  0.0  0.0  3.0  0.0  2.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
3           0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
4           0.0  2.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  ...  0.0  0.0   
5           0.0  0.0  0.0  0.0  0.0  2.0  0.0  0.0  0.0  0.0  ...  0.0  1.0   

product_id   43   44   45   46   47   48   49   50  
user_id                                             
1           0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  
2           0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  
3           0.0  0.0  1.0  0.0  0.0  0.0  3.0  0.0  
4           0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  
5           0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  

[5 rows x 50 columns]


In [5]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Input, Dot, Flatten, Dense
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

In [6]:
user_ids = data['user_id'].values
product_ids = data['product_id'].values
ratings = data['rating'].values

train_user_ids, test_user_ids, train_product_ids, test_product_ids, train_ratings, test_ratings = train_test_split(
    user_ids, product_ids, ratings, test_size=0.2, random_state=42)

In [7]:



# Prepare the data for training
user_ids = data['user_id'].values
product_ids = data['product_id'].values
ratings = data['rating'].values

train_user_ids, test_user_ids, train_product_ids, test_product_ids, train_ratings, test_ratings = train_test_split(
    user_ids, product_ids, ratings, test_size=0.2, random_state=42)





In [8]:
# Build the model
num_users = len(data['user_id'].unique())
num_products = len(data['product_id'].unique())
embedding_size = 50

user_input = Input(shape=(1,))
user_embedding = Embedding(num_users + 1, embedding_size)(user_input)
user_vec = Flatten()(user_embedding)

product_input = Input(shape=(1,))
product_embedding = Embedding(num_products + 1, embedding_size)(product_input)
product_vec = Flatten()(product_embedding)

dot_product = Dot(axes=1)([user_vec, product_vec])
output = Dense(1)(dot_product)

model = Model(inputs=[user_input, product_input], outputs=output)
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit([train_user_ids, train_product_ids], train_ratings, epochs=10, verbose=1)

# Evaluate the model
loss = model.evaluate([test_user_ids, test_product_ids], test_ratings)
print(f'Test loss: {loss}')

Epoch 1/10


In [None]:

from sklearn.metrics import precision_recall_fscore_support

# Ensure true_actions are converted to strings
rating_to_action = {1: 'view', 2: 'click', 3: 'purchase'}
true_actions = [rating_to_action[rating] if rating in rating_to_action else 'unknown' for rating in test_ratings]

# Round the predicted ratings and convert to actions
predicted_ratings = np.round(predicted_ratings).astype(int)
predicted_actions = [rating_to_action[rating] if rating in rating_to_action else 'unknown' for rating in predicted_ratings]

# Filter out 'unknown' actions
filtered_true_actions = []
filtered_predicted_actions = []

for true_action, predicted_action in zip(true_actions, predicted_actions):
    if predicted_action != 'unknown':
        filtered_true_actions.append(true_action)
        filtered_predicted_actions.append(predicted_action)

# Calculate metrics
precision, recall, f1, _ = precision_recall_fscore_support(filtered_true_actions, filtered_predicted_actions, average='weighted')

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
