In [1]:
import pandas as pd
import numpy as np
import random

# Load the dataset
products = pd.read_csv('products_dataset.csv')

# Simulate users
num_users = 500
user_ids = [f'U{i:03d}' for i in range(1, num_users + 1)]

# Create random user-product purchase interactions
interactions = []
for user in user_ids:
    purchased_products = random.sample(list(products['Product ID']), k=random.randint(5, 20))
    for product in purchased_products:
        interactions.append((user, product, 1))  # 1 means purchased

# Convert to DataFrame
interactions_df = pd.DataFrame(interactions, columns=['UserID', 'ProductID', 'Purchased'])


In [2]:
# Pivot to create user-item matrix
user_item_matrix = interactions_df.pivot_table(index='UserID', columns='ProductID', values='Purchased', fill_value=0)

In [3]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute item-item similarity matrix
item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)


In [4]:
def recommend_products_by_category(category, top_n=5):
    # Filter products by category
    category_products = products[products['Category'] == category]['Product ID'].tolist()
    
    # Select the top N products based on overall popularity
    product_popularity = interactions_df[interactions_df['ProductID'].isin(category_products)]\
                         .groupby('ProductID').size().sort_values(ascending=False)
    
    top_products = product_popularity.head(top_n).index.tolist()
    
    # Get product details
    recommended = products[products['Product ID'].isin(top_products)]
    
    return recommended[['Product ID', 'Category', 'Rating']]

# Example: Recommend top 5 Shoes
recommend_products_by_category('Shoes', 5)


Unnamed: 0,Product ID,Category,Rating
52,P0053,Shoes,4.2
156,P0157,Shoes,3.3
406,P0407,Shoes,1.5
668,P0669,Shoes,4.5
861,P0862,Shoes,4.5
