### I will Build a product recommendation system application with my own dataset

In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [3]:
# Step 1: Define dataset size
num_users = 500  # Number of users
num_products = 20  # Limiting products to 20 grocery items
num_interactions = 5000  # Number of interactions (ratings)

In [4]:
# Step 2: Define grocery product names
grocery_items = [
    "Rice", "Wheat Flour", "Sugar", "Salt", "Milk", "Eggs", "Butter", "Cheese", "Yogurt", "Honey",
    "Tea", "Coffee", "Pasta", "Noodles", "Cereal", "Oats", "Cooking Oil", "Bread", "Biscuits", "Juice"
]

In [5]:
# Step 3: Generate user and product IDs
user_ids = [f"U{i}" for i in range(1, num_users + 1)]
product_ids = [f"P{i}" for i in range(1, num_products + 1)]
product_mapping = dict(zip(product_ids, grocery_items))

In [6]:
# Step 4: Generate random user-product interactions
data = []
for _ in range(num_interactions):
    user = random.choice(user_ids)
    product = random.choice(product_ids)
    rating = random.randint(1, 5)  # Ratings from 1 to 5
    timestamp = (datetime.now() - timedelta(days=random.randint(1, 365))).date()
    data.append([user, product_mapping[product], rating, timestamp])

In [7]:
# Step 5: Create DataFrame
df = pd.DataFrame(data, columns=["User_ID", "Product_Name", "Rating", "Date"])

In [8]:
df

Unnamed: 0,User_ID,Product_Name,Rating,Date
0,U18,Cooking Oil,5,2024-11-04
1,U85,Sugar,2,2024-10-14
2,U478,Oats,3,2025-02-26
3,U482,Bread,5,2024-08-19
4,U141,Cereal,5,2024-08-29
...,...,...,...,...
4995,U363,Butter,2,2024-08-24
4996,U40,Rice,4,2025-01-11
4997,U43,Pasta,2,2024-04-10
4998,U258,Cheese,3,2024-11-26


In [9]:
# # Step 6: Save dataset to CSV
# df.to_csv("grocery_recommendation_dataset.csv", index=False)
# print("Dataset generated and saved as 'grocery_recommendation_dataset.csv'")

In [10]:
# Step 7: Load dataset
df = pd.read_csv("grocery_recommendation_dataset.csv")
df

Unnamed: 0,User_ID,Product_Name,Rating,Date
0,U294,Honey,1,2024-12-21
1,U409,Bread,2,2024-04-26
2,U304,Bread,4,2024-10-23
3,U455,Eggs,4,2025-01-07
4,U216,Cheese,1,2024-05-05
...,...,...,...,...
4995,U313,Honey,2,2025-02-01
4996,U67,Oats,2,2024-06-23
4997,U496,Sugar,1,2024-07-01
4998,U327,Biscuits,3,2025-01-09


In [11]:
# Step 8: Create a user-product matrix
user_product_matrix = df.pivot_table(index='User_ID', columns='Product_Name', values='Rating', fill_value=0)
user_product_matrix

Product_Name,Biscuits,Bread,Butter,Cereal,Cheese,Coffee,Cooking Oil,Eggs,Honey,Juice,Milk,Noodles,Oats,Pasta,Rice,Salt,Sugar,Tea,Wheat Flour,Yogurt
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
U1,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,1.0,0.0,0.0,2.5,3.0,0.0,1.5,0.0,0.000000,0.000000,4.5,0.0
U10,2.0,0.0,0.0,4.0,0.0,5.0,0.0,1.0,0.0,0.0,4.0,5.0,4.0,0.0,4.0,3.0,0.000000,0.000000,4.0,5.0
U100,0.0,1.0,2.0,1.0,0.0,0.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,2.0,0.0,0.0,1.000000,0.000000,5.0,0.0
U101,0.0,5.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,1.5,0.0,0.0,0.000000,0.000000,0.0,0.0
U102,3.0,0.0,2.5,0.0,1.0,0.0,0.0,3.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,3.0,3.000000,0.000000,0.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
U95,0.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.000000,0.000000,0.0,3.0
U96,5.0,0.0,4.0,2.5,5.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.000000,4.000000,0.0,2.0
U97,4.0,4.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4.0,4.0,0.0,0.0,5.000000,0.000000,3.0,3.0
U98,4.0,0.0,0.0,5.0,0.0,0.0,3.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,1.0


In [12]:
# Step 9: Convert to sparse matrix
sparse_matrix = csr_matrix(user_product_matrix)
sparse_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 3928 stored elements and shape (500, 20)>

In [13]:
# Step 10: Compute similarity between products
product_similarity = cosine_similarity(sparse_matrix.T)
product_similarity_df = pd.DataFrame(product_similarity, index=user_product_matrix.columns, columns=user_product_matrix.columns)

In [14]:
product_similarity
product_similarity_df

Product_Name,Biscuits,Bread,Butter,Cereal,Cheese,Coffee,Cooking Oil,Eggs,Honey,Juice,Milk,Noodles,Oats,Pasta,Rice,Salt,Sugar,Tea,Wheat Flour,Yogurt
Product_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Biscuits,1.0,0.344585,0.338733,0.387661,0.348652,0.311869,0.347278,0.330596,0.359177,0.353652,0.328851,0.32144,0.361513,0.26531,0.336407,0.369014,0.352448,0.372383,0.337321,0.303658
Bread,0.344585,1.0,0.356921,0.321524,0.275354,0.358723,0.282894,0.289362,0.299023,0.323053,0.341913,0.310196,0.339721,0.344749,0.34194,0.342628,0.360514,0.347934,0.307397,0.273801
Butter,0.338733,0.356921,1.0,0.365845,0.302496,0.382314,0.338493,0.296605,0.288803,0.304623,0.354258,0.309197,0.31807,0.280526,0.364721,0.326134,0.310992,0.344281,0.356905,0.355983
Cereal,0.387661,0.321524,0.365845,1.0,0.341287,0.381034,0.350752,0.317114,0.325919,0.359087,0.331505,0.294647,0.352844,0.310067,0.288798,0.362895,0.329762,0.369518,0.342402,0.347218
Cheese,0.348652,0.275354,0.302496,0.341287,1.0,0.326307,0.317239,0.363369,0.368104,0.312428,0.353391,0.305789,0.341736,0.290731,0.347241,0.320528,0.30017,0.332558,0.3081,0.32309
Coffee,0.311869,0.358723,0.382314,0.381034,0.326307,1.0,0.301184,0.364763,0.283063,0.378733,0.42648,0.319266,0.377212,0.37837,0.387703,0.3408,0.354572,0.439712,0.345376,0.349311
Cooking Oil,0.347278,0.282894,0.338493,0.350752,0.317239,0.301184,1.0,0.329368,0.346534,0.307838,0.337318,0.335759,0.31784,0.338866,0.333624,0.288364,0.316797,0.374169,0.357171,0.350579
Eggs,0.330596,0.289362,0.296605,0.317114,0.363369,0.364763,0.329368,1.0,0.315562,0.290488,0.310049,0.286586,0.361142,0.342829,0.335431,0.362935,0.327574,0.292948,0.308086,0.316884
Honey,0.359177,0.299023,0.288803,0.325919,0.368104,0.283063,0.346534,0.315562,1.0,0.363859,0.349917,0.331039,0.323852,0.322178,0.321197,0.326243,0.342717,0.372619,0.311757,0.305282
Juice,0.353652,0.323053,0.304623,0.359087,0.312428,0.378733,0.307838,0.290488,0.363859,1.0,0.292513,0.318656,0.317285,0.268256,0.297834,0.382956,0.31844,0.32435,0.305096,0.315256


In [15]:
# Step 11: Function to get product recommendations
def get_recommendations(product_name, num_recommendations=5):
    if product_name not in product_similarity_df.index:
        return ["Product not found"]
    similar_products = product_similarity_df[product_name].sort_values(ascending=False).iloc[1:num_recommendations+1]
    return list(similar_products.index)

In [28]:
# Example usage:
example_product = "Sugar"  # Replace with a valid product name from dataset
recommended_products = get_recommendations(example_product)
print(f"Recommended products for {example_product}: {recommended_products}")

Recommended products for Sugar: ['Milk', 'Bread', 'Tea', 'Coffee', 'Rice']
