In [3]:
# Import the required libraries
import pandas as pd
import numpy as np
import random

In [5]:
# Set random seed for reproducibility
np.random.seed(0)

# Generate user data
user_count = 1000
categories = ['Electronics', 'Books', 'Clothing', 'Home', 'Sports']

user_data = {
    'user_id': range(1, user_count + 1),
    'preferred_category': np.random.choice(categories, user_count)
}
users = pd.DataFrame(user_data)

# Display the first few rows to verify
users.head()


Unnamed: 0,user_id,preferred_category
0,1,Sports
1,2,Electronics
2,3,Home
3,4,Home
4,5,Home


In [7]:
# Generate product data
product_count = 500

product_data = {
    'product_id': range(101, 101 + product_count),
    'category': np.random.choice(categories, product_count),
    'rating': np.round(np.random.uniform(1, 5, product_count), 1),  # Ratings between 1.0 and 5.0
    'price': np.random.randint(10, 500, product_count),              # Prices between 10 and 500
    'popularity': np.random.randint(1, 200, product_count)           # Popularity count between 1 and 200
}
products = pd.DataFrame(product_data)

# Display the first few rows to verify
products.head()


Unnamed: 0,product_id,category,rating,price,popularity
0,101,Home,4.1,335,42
1,102,Electronics,4.4,315,20
2,103,Clothing,4.4,35,68
3,104,Sports,2.0,278,128
4,105,Books,4.3,62,120


In [9]:
# Generate interaction data
interaction_count = 10000
interaction_types = ['click', 'purchase']

interaction_data = {
    'user_id': np.random.choice(users['user_id'], interaction_count),
    'product_id': np.random.choice(products['product_id'], interaction_count),
    'interaction_type': np.random.choice(interaction_types, interaction_count)
}
interactions = pd.DataFrame(interaction_data)

# Display the first few rows to verify
interactions.head()


Unnamed: 0,user_id,product_id,interaction_type
0,799,408,click
1,782,280,click
2,789,263,click
3,885,452,purchase
4,332,255,click


In [11]:
# Save datasets to an Excel file with multiple sheets
with pd.ExcelWriter("rank_based_recommendation_data.xlsx") as writer:
    users.to_excel(writer, sheet_name="Users", index=False)
    products.to_excel(writer, sheet_name="Products", index=False)
    interactions.to_excel(writer, sheet_name="Interactions", index=False)

print("Excel file 'rank_based_recommendation_data.xlsx' created successfully.")


Excel file 'rank_based_recommendation_data.xlsx' created successfully.


In [13]:
import pandas as pd
import numpy as np

# Load data from the Excel file
data_file = "rank_based_recommendation_data.xlsx"
users = pd.read_excel(data_file, sheet_name="Users")
products = pd.read_excel(data_file, sheet_name="Products")
interactions = pd.read_excel(data_file, sheet_name="Interactions")

# Display the first few rows of each dataset to verify
print("Users Data:\n", users.head())
print("\nProducts Data:\n", products.head())
print("\nInteractions Data:\n", interactions.head())


Users Data:
    user_id preferred_category
0        1             Sports
1        2        Electronics
2        3               Home
3        4               Home
4        5               Home

Products Data:
    product_id     category  rating  price  popularity
0         101         Home     4.1    335          42
1         102  Electronics     4.4    315          20
2         103     Clothing     4.4     35          68
3         104       Sports     2.0    278         128
4         105        Books     4.3     62         120

Interactions Data:
    user_id  product_id interaction_type
0      799         408            click
1      782         280            click
2      789         263            click
3      885         452         purchase
4      332         255            click


In [15]:
# Step 1: Calculate total interactions for each product
product_interactions = interactions.groupby('product_id').size().reset_index(name='total_interactions')

# Step 2: Calculate the number of purchases for each product
product_purchases = interactions[interactions['interaction_type'] == 'purchase'].groupby('product_id').size().reset_index(name='total_purchases')

# Step 3: Merge these interaction counts with the products dataset
products = products.merge(product_interactions, on='product_id', how='left').merge(product_purchases, on='product_id', how='left')

# Fill any missing values with 0 for products with no recorded interactions
products['total_interactions'].fillna(0, inplace=True)
products['total_purchases'].fillna(0, inplace=True)

# Display processed product data
products.head()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  products['total_interactions'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  products['total_purchases'].fillna(0, inplace=True)


Unnamed: 0,product_id,category,rating,price,popularity,total_interactions,total_purchases
0,101,Home,4.1,335,42,21,11
1,102,Electronics,4.4,315,20,14,8
2,103,Clothing,4.4,35,68,15,8
3,104,Sports,2.0,278,128,24,10
4,105,Books,4.3,62,120,12,6


In [17]:
# Define a scoring function for ranking
def calculate_score(row):
    # Formula for score: weighted sum of factors
    return (row['rating'] * 2) + (row['popularity'] * 0.5) + (row['total_interactions'] * 0.3) + (row['total_purchases'] * 1.5)

# Apply the scoring function to each product
products['score'] = products.apply(calculate_score, axis=1)

# Sort products by score in descending order
products = products.sort_values(by='score', ascending=False)

# Display top 10 recommended products
products[['product_id', 'category', 'rating', 'popularity', 'total_interactions', 'total_purchases', 'score']].head(10)


Unnamed: 0,product_id,category,rating,popularity,total_interactions,total_purchases,score
498,599,Books,1.5,199,30,22,144.5
234,335,Electronics,1.3,199,30,19,139.6
472,573,Books,4.8,185,25,17,135.1
220,321,Home,4.2,193,24,14,133.1
253,354,Sports,4.6,189,24,14,131.9
441,542,Sports,1.9,199,23,14,131.2
462,563,Sports,4.9,198,22,10,130.4
170,271,Electronics,2.5,190,23,15,129.4
82,183,Clothing,4.7,188,26,12,129.2
460,561,Books,2.8,192,27,13,129.2


In [19]:
# Define a function to recommend top products for a given user
def recommend_products(user_id, num_recommendations=5):
    # Get the user's preferred category
    preferred_category = users.loc[users['user_id'] == user_id, 'preferred_category'].values[0]
    
    # Filter products based on the preferred category
    preferred_products = products[products['category'] == preferred_category]
    
    # Select the top products based on score
    top_products = preferred_products.head(num_recommendations)
    
    return top_products[['product_id', 'category', 'rating', 'popularity', 'score']]

# Test recommendations for a specific user
user_id = 1  # Example user ID
recommendations = recommend_products(user_id)
print(f"Top recommendations for User {user_id}:\n", recommendations)


Top recommendations for User 1:
      product_id category  rating  popularity  score
253         354   Sports     4.6         189  131.9
441         542   Sports     1.9         199  131.2
462         563   Sports     4.9         198  130.4
239         340   Sports     4.4         176  124.7
342         443   Sports     4.7         194  122.0
