In [5]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Example datasets
users = pd.read_csv("UserDataset.csv")

products = pd.read_csv("market_product_dataset.csv")
print(products)
ratings = pd.read_csv("RatingsDataset.csv")

browsing_history = pd.read_csv("browsing_history_dataset.csv")

# Create user-item matrix
def create_user_item_matrix(ratings_df):
    user_mapper = {user: idx for idx, user in enumerate(ratings_df['userId'].unique())}
    product_mapper = {prod: idx for idx, prod in enumerate(ratings_df['productId'].unique())}

    user_index = ratings_df['userId'].map(user_mapper)
    product_index = ratings_df['productId'].map(product_mapper)

    # Transpose the matrix to make rows as users and columns as products
    user_item_matrix = csr_matrix((ratings_df['rating'], (user_index, product_index)), 
                                  shape=(len(user_mapper), len(product_mapper)))
    return user_item_matrix, user_mapper, {v: k for k, v in product_mapper.items()}

user_item_matrix, user_mapper, reverse_product_mapper = create_user_item_matrix(ratings)

# Implement collaborative filtering
def recommend_products(user_id, user_item_matrix, user_mapper, reverse_product_mapper, products_df, k=5):
    user_idx = user_mapper[user_id]
    user_vector = user_item_matrix[user_idx, :].toarray().flatten()

    knn = NearestNeighbors(metric='cosine', algorithm='brute')
    knn.fit(user_item_matrix)

    distances, indices = knn.kneighbors([user_vector], n_neighbors=k+1)
    
    # Map indices to product IDs, then to product titles
    recommended_ids = [reverse_product_mapper[i] for i in indices.flatten()[1:]]
    recommended_titles = products_df[products_df['productId'].isin(recommended_ids)]['title'].tolist()
    
    return recommended_titles

# Example recommendation
recommendations = recommend_products(
    user_id=9, 
    user_item_matrix=user_item_matrix, 
    user_mapper=user_mapper, 
    reverse_product_mapper=reverse_product_mapper, 
    products_df=products, 
    k=2
)
print(f"Recommended products: {recommendations}")


    productId              title         category   price  \
0           1  Vacuum Cleaner 60  Home Essentials  130.68   
1           2           Jeans 15          Apparel  697.72   
2           3             Cap 67          Apparel  958.38   
3           4    Coffee Maker 11  Home Essentials  200.85   
4           5         Blender 48  Home Essentials  594.97   
..        ...                ...              ...     ...   
95         96      Smartwatch 46      Electronics  418.37   
96         97      Smartwatch 20      Electronics  832.81   
97         98       Biography 84            Books  119.05   
98         99     Smartwatch 100      Electronics  322.78   
99        100       Biography 76            Books  255.49   

                                          description  
0   This is a high-quality vacuum cleaner in the h...  
1   This is a high-quality jeans in the apparel ca...  
2   This is a high-quality cap in the apparel cate...  
3   This is a high-quality coffee maker in 