In [49]:
events_data["userId"] = events_data["userId"].astype(str)
print(events_data["userId"].unique())  # See all unique user IDs in the dataset
print(events_data[events_data["userId"] == "67b04f3cdc12c6e7d07cff48"])


['65f3c55249132e4527d465f7' '67b04f3cdc12c6e7d07cff48'
 '67b0587a928d6fd8bf045fdf' '67b0bc586ffde1a21eaf0e1f'
 '67b04dcf928d6fd8bf045fc9' '67b0f429ea5564202b96a19e']
                         _id                    userId  \
10  67b0a5f2b87d10638a0e1ce1  67b04f3cdc12c6e7d07cff48   
11  67b0a636b87d10638a0e1ce5  67b04f3cdc12c6e7d07cff48   
12  67b0a663b87d10638a0e1cef  67b04f3cdc12c6e7d07cff48   
13  67b0a663b87d10638a0e1cf1  67b04f3cdc12c6e7d07cff48   
14  67b0a663b87d10638a0e1cf3  67b04f3cdc12c6e7d07cff48   
15  67b0a663b87d10638a0e1cf5  67b04f3cdc12c6e7d07cff48   
16  67b0a663b87d10638a0e1cf8  67b04f3cdc12c6e7d07cff48   
17  67b0a664b87d10638a0e1cfa  67b04f3cdc12c6e7d07cff48   
18  67b0a664b87d10638a0e1cfc  67b04f3cdc12c6e7d07cff48   
19  67b0b839b87d10638a0e1d00  67b04f3cdc12c6e7d07cff48   
20  67b0b972b87d10638a0e1d05  67b04f3cdc12c6e7d07cff48   
21  67b0bc11b87d10638a0e1d18  67b04f3cdc12c6e7d07cff48   
22  67b0bc3db87d10638a0e1d1a  67b04f3cdc12c6e7d07cff48   
68  67b104679b698ac759

In [27]:
print(events_data.dtypes)


_id          object
userId       object
productId    object
eventType    object
weight        int64
dtype: object


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from pymongo import MongoClient

# ✅ Connect to MongoDB
client = MongoClient("mongodb+srv://anashenrya:attackanas17@onlineecommerce-cluster.0ckq4.mongodb.net/?retryWrites=true&w=majority&appName=OnlineEcommerce-Cluster/test")
db = client["test"]

# ✅ Load product data
products_data = pd.DataFrame(list(db.products.find({}, {"_id": 1, "title": 1, "description": 1})))

# ✅ Load event data (user interactions)
events_data = pd.DataFrame(list(db.events.find({}, {"userId": 1, "productId": 1, "eventType": 1})))

# Ensure product IDs are strings
products_data["_id"] = products_data["_id"].astype(str)

# Ensure userId and productId are treated as strings and strip whitespace
events_data["userId"] = events_data["userId"].astype(str).str.strip()
events_data["productId"] = events_data["productId"].astype(str).str.strip()

# Assign event weights
event_weights = {"view": 1,"search":2, "add_to_cart": 3, "purchase": 5}
events_data["weight"] = events_data["eventType"].map(event_weights)

# Debugging: Print unique user IDs and events data
print("Unique User IDs in events_data:")
print(events_data["userId"].unique())
print("\nFirst few rows of events_data:")
print(events_data.head())

In [None]:


# Combine title and description for content filtering
products_data["combined_features"] = products_data["title"].fillna("") + " " + products_data["description"].fillna("")

# Compute TF-IDF matrix
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(products_data["combined_features"])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function: Content-Based Filtering
def content_based_recommendations(product_id, products_data, cosine_sim, top_n=5):
    print(f"\nFinding content-based recommendations for product: {product_id}")
    if product_id not in products_data["_id"].values:
        print(f"⚠ No product found for ID: {product_id}")
        return []

    idx = products_data.index[products_data["_id"] == product_id][0]
    similarity_scores = list(enumerate(cosine_sim[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    recommended_indices = [i[0] for i in similarity_scores[1:top_n+1]]
    recommended_products = products_data.iloc[recommended_indices]["_id"].tolist()
    
    print(f"Content-based recommendations: {recommended_products}")
    return recommended_products

# Function: Collaborative Filtering
def collaborative_filtering(user_id, events_data, top_n=5):
    print(f"\nFinding collaborative filtering recommendations for user: {user_id}")
    user_events = events_data[events_data["userId"] == str(user_id)]

    if user_events.empty:
        print(f"⚠ No interactions found for user: {user_id}")
        return []

    print(f"✅ User {user_id} found with {len(user_events)} interactions!")

    # Aggregate interactions by user-product pairs
    user_interactions = events_data.groupby(["userId", "productId"])["weight"].sum().reset_index()

    # Create a pivot table
    user_item_matrix = user_interactions.pivot(index="userId", columns="productId", values="weight").fillna(0)

    if user_id not in user_item_matrix.index:
        print(f"⚠ User {user_id} has no recorded interactions in the matrix.")
        return []

    # Compute similarity scores
    user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1)
    similarity_scores = cosine_similarity(user_vector, user_item_matrix)[0]

    # Rank users by similarity
    similar_users = list(user_item_matrix.index[np.argsort(similarity_scores)[::-1]][1:])

    recommended_products = set()
    for similar_user in similar_users:
        recommended_products.update(user_interactions[user_interactions["userId"] == similar_user]["productId"].tolist())

        if len(recommended_products) >= top_n:
            break

    print(f"Collaborative filtering recommendations: {list(recommended_products)[:top_n]}")
    return list(recommended_products)[:top_n]

# Function to check user interactions
def check_user_interactions(user_id):
    print(f"\nChecking interactions for user ID: {user_id}")
    user_events = events_data[events_data["userId"] == str(user_id)]
    
    if user_events.empty:
        print(f"⚠ No interactions found for user: {user_id}")
    else:
        print(f"✅ Interactions found for user {user_id}:")
        print(user_events)

# Function to get recommendations for a specific user
def get_recommendations_for_user(user_id, products_data, cosine_sim, events_data, top_n=5):
    print(f"\nEntering get_recommendations_for_user for user: {user_id}")
    
    # Get user events
    user_events = events_data[events_data["userId"] == str(user_id)]
    
    if user_events.empty:
        print(f"⚠ No interactions found for user: {user_id}")
        return []

    print(f"✅ User {user_id} found with {len(user_events)} interactions!")
    print(f"User events data: \n{user_events}")

    # Get product IDs from user's events
    user_products = user_events["productId"]

    if user_products.empty:
        print(f"⚠ No product interactions found for user {user_id}")
        return []

    first_product_id = user_products.iloc[0]
    print(f"First product ID: {first_product_id}")

    # Check if this product ID exists in products_data
    if first_product_id not in products_data["_id"].values:
        print(f"⚠ Product ID {first_product_id} not found in products data.")
        return []

    # Get content-based recommendations
    content_recommendations = content_based_recommendations(first_product_id, products_data, cosine_sim, top_n)
    
    # Get collaborative filtering recommendations
    collaborative_recommendations = collaborative_filtering(user_id, events_data, top_n)

    # Hybrid approach: combine and remove duplicates
    hybrid_recommendations = list(set(content_recommendations + collaborative_recommendations))

    print(f"📌 Content-Based Recommendations: {content_recommendations}")
    print(f"📌 Collaborative Filtering Recommendations: {collaborative_recommendations}")
    print(f"📌 Hybrid Recommendations: {hybrid_recommendations}")

    return hybrid_recommendations

# Example usage
user_id_to_test = "67b0bc586ffde1a21eaf0e1f"  # Change this to the user ID you want
check_user_interactions(user_id_to_test)

recommendations = get_recommendations_for_user(user_id_to_test, products_data, cosine_sim, events_data, top_n=5)
print(f"\nFinal Recommendations for User {user_id_to_test}: {recommendations}")


Unique User IDs in events_data:
['65f3c55249132e4527d465f7' '67b04f3cdc12c6e7d07cff48'
 '67b0587a928d6fd8bf045fdf' '67b0bc586ffde1a21eaf0e1f'
 '67b04dcf928d6fd8bf045fc9' '67b0f429ea5564202b96a19e']

First few rows of events_data:
                        _id                    userId  \
0  67b04f1ddc12c6e7d07cff38  65f3c55249132e4527d465f7   
1  67b04f22dc12c6e7d07cff3c  65f3c55249132e4527d465f7   
2  67b04f24dc12c6e7d07cff40  65f3c55249132e4527d465f7   
3  67b04f28dc12c6e7d07cff44  65f3c55249132e4527d465f7   
4  67b04f91dc12c6e7d07cff4e  65f3c55249132e4527d465f7   

                  productId eventType  weight  
0  67b04d562784417bd80f966b      view       1  
1  67b04d562784417bd80f966a      view       1  
2  67b04d562784417bd80f9669      view       1  
3  67b04d562784417bd80f9668      view       1  
4  67b04d562784417bd80f9661      view       1  

Checking interactions for user ID: 67b0bc586ffde1a21eaf0e1f
✅ Interactions found for user 67b0bc586ffde1a21eaf0e1f:
                      