In [6]:
from pymongo import MongoClient
import pandas as pd

# Connect to MongoDB
client = MongoClient("mongodb+srv://farid:12345@mycluster.w4fnv.mongodb.net/zibana")
db = client['zibana']
products_collection = db['products'] 
users_collection = db["users"]

# Fetch all products
cursor = products_collection.find({}) 

# Convert cursor to a list of dictionaries
products_list = list(cursor)

# Check if products_list is not empty
if products_list:
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(products_list)

    # Save DataFrame to CSV
    df.to_csv('products.csv', index=False) 
    print("Products saved to products.csv")
else:
    print("No products found in the collection.")

Products saved to products.csv


In [7]:
# Fetch all users' interaction data
cursor = users_collection.find({}, {"viewHistory": 1, "likedProducts": 1})

In [8]:
# Prepare a list to hold the data
data = []

# Iterate through each user's document
for doc in cursor:
    user_id = str(doc['_id'])  # Get the user_id (_id field)
    
    # Process viewHistory
    views = doc.get('viewHistory', [])
    for view in views:
        product_id = view['productId']  # Get the product_id
        # Assign a weight of 1 for views
        data.append({
            'product_id': product_id,
            'user_id': user_id,
            'interaction': 1  # 1 signifies a view
        })
    
    # Process likedProducts
    likes = doc.get('likedProducts', [])
    for liked_product in likes:
        product_id = liked_product  # Get the product_id
        # Assign a weight of 2 for likes
        data.append({
            'product_id': product_id,
            'user_id': user_id,
            'interaction': 2  # 2 signifies a like
        })

# Create a DataFrame from the data
df = pd.DataFrame(data)
print(df.head())
df.to_csv('data.csv', index=False)

                 product_id                   user_id  interaction
0  678e161044a092bc86641aa0  678e0fe596822708dc914e9f            1
1  678f5bb777bdc2dd8ccd0748  678e0fe596822708dc914e9f            1
2  678e14f844a092bc866418e9  678e0fe596822708dc914e9f            1
3  678e14f944a092bc866418ed  678e0fe596822708dc914e9f            1
4  678e14f944a092bc866418f1  678e0fe596822708dc914e9f            1


In [9]:
from sklearn.preprocessing import LabelEncoder

# Initialize the encoders
user_encoder = LabelEncoder()
product_encoder = LabelEncoder()

# Fit on the entire dataset
user_encoder.fit(df['user_id'])
product_encoder.fit(df['product_id'])

# Transform the data
df['user_id_encoded'] = user_encoder.transform(df['user_id'])
df['product_id_encoded'] = product_encoder.transform(df['product_id'])

print(df.head())

                 product_id                   user_id  interaction  \
0  678e161044a092bc86641aa0  678e0fe596822708dc914e9f            1   
1  678f5bb777bdc2dd8ccd0748  678e0fe596822708dc914e9f            1   
2  678e14f844a092bc866418e9  678e0fe596822708dc914e9f            1   
3  678e14f944a092bc866418ed  678e0fe596822708dc914e9f            1   
4  678e14f944a092bc866418f1  678e0fe596822708dc914e9f            1   

   user_id_encoded  product_id_encoded  
0                0                  53  
1                0                  70  
2                0                   1  
3                0                   2  
4                0                   3  


In [10]:
from sklearn.model_selection import train_test_split

# Define features and target
X = df[['user_id_encoded', 'product_id_encoded']]
y = df['interaction']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42
)

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")

Training samples: 324
Testing samples: 36


In [11]:
from models import build_recommendation_model, train_recommendation_model

# Determine the number of unique users and products
n_users = df['user_id_encoded'].nunique()
n_products = df['product_id_encoded'].nunique()

# Build the recommendation model
model = build_recommendation_model(n_users, n_products)

# (Optional) Display the model summary
model.summary()

2025-02-22 17:42:03.265234: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740233523.284776   13252 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740233523.290547   13252 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-22 17:42:03.310577: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-02-22 17:42:05.740755: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL

In [12]:
# Prepare training arrays
train_user = X_train['user_id_encoded'].values
train_product = X_train['product_id_encoded'].values
train_labels = y_train.values - 1  # Adjust labels to start from 0 if needed

In [17]:
# Train the model
history = train_recommendation_model(model, train_user, train_product, train_labels)

Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.9314 - loss: 0.1251 - val_accuracy: 0.3939 - val_loss: 2.6841
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9482 - loss: 0.1152 - val_accuracy: 0.4242 - val_loss: 2.7184
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.9534 - loss: 0.1030 - val_accuracy: 0.3939 - val_loss: 2.7344
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9299 - loss: 0.1213 - val_accuracy: 0.4242 - val_loss: 2.7585
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9457 - loss: 0.1085 - val_accuracy: 0.4242 - val_loss: 2.7790
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.9333 - loss: 0.1229 - val_accuracy: 0.4242 - val_loss: 2.8502
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━

In [18]:
# Save the trained model
model.save('recommendation_model.h5')

