# Create the customer ratings used in this sample.

The underlying product catalog is from the .NET eShop sample on GitHub. There are no customers within it. In this notebook we will create 10000 customers and generate 20 random product ratings for each of them. We will then take the random ratings and apply weights to them by brand, product category (Type) and price to provide a more meaningful and useful dataset with which to train our prediction model.

In [None]:

import pandas as pd

# Open the product catalog and read it into a DataFrame
json_file_path = 'catalog.json'
df = pd.read_json(json_file_path)

# Group items by 'Type' (product category). For demonstration, we'll just list the Names of the items.
# You can customize this aggregation to include other operations like counting items, averaging prices, etc.
grouped_items = df.groupby('Type').agg({'Name': lambda x: list(x)})

print(grouped_items)


## Randomly rates 5-20 products for 10000 users 


In [13]:

# Load the product catalog to get product IDs
with open("./data/catalog/catalog.json", "r") as file:
    product_catalog = json.load(file)

# Extract product IDs
product_ids = [product["Id"] for product in product_catalog]

# Define a maximum number of products a user can rate
max_products_per_user = 20

# Assuming a max rating value
max_rating = 10

# Define the number of user IDs you want
num_user_ids = 10001

# Generate user IDs from 1 to 10001
current_user_ids = list(range(1, num_user_ids))

current_ratings = []
# Generate new ratings while ensuring not all products are rated
for user_id in current_user_ids:
    # Randomly select a subset of products to rate, respecting the max_products_per_user limit
    num_products_to_sample = np.random.randint(5,max_products_per_user)
    products_to_rate = random.sample(product_ids, min(len(product_ids), num_products_to_sample))
    
    for product_id in products_to_rate:
        product_info = next((product for product in product_catalog if product["Id"] == product_id), None)
        if product_info:  # If product found
            new_rating = {
                "UserId": user_id,
                "ProductId": product_id,
                "Rating": random.randint(1, max_rating)
            }
            current_ratings.append(new_rating)

# Convert the updated data to JSON format
json_data = json.dumps(current_ratings, indent=4)

# Write the updated JSON data to a new file
with open("./data/ratings/randomRatings.json", "w") as file:
    file.write(json_data)

## Verify random ratings created

In [None]:
import json

# Load the updated ratings data
with open("./data/ratings/randomRatings.json", "r") as file:
    ratings = json.load(file)

# Count the number of items rated by each user
ratings_count_by_user = {}
for rating in ratings:
    user_id = rating["UserId"]
    if user_id in ratings_count_by_user:
        ratings_count_by_user[user_id] += 1
    else:
        ratings_count_by_user[user_id] = 1

total_user_ids = len(ratings_count_by_user)

# Print the total number of user IDs
print(f"Total number of user IDs: {total_user_ids}")

for user_id, count in ratings_count_by_user.items():
    print(f"User ID: {user_id} has rated {count} items.")

## Advanced Rating system
Now taking in random ratings and giving it a pattern based on brand/ type/ price. Storing it in a new file 'actualRating.json'


In [15]:
import json
import random

# Load the current user ratings
with open("./data/ratings/randomRatings.json", "r") as file:
    current_ratings = json.load(file)

# Load the product catalog
with open("./data/catalog/catalog.json", "r") as file:
    product_catalog = json.load(file)

total_desired_ratings = 200000
# Define preferences
brand_preferences = {"Daybird": 8, "Gravitator": 7, "WildRunner": 9, "Quester": 6, "B&R": 5, "Raptor Elite": 7, "Solstix": 8, "Grolltex": 6, "AirStrider": 7, "Green Equipment": 9, "Legend": 5, "Zephyr": 6, "XE": 4}
type_preferences = ["Footwear", "Climbing", "Ski/boarding", "Bags", "Jackets", "Navigation", "Cycling", "Trekking"]

curr = range(1, 10001)
# Simulate user preferences for product types
user_type_preferences = {user_id: random.sample(type_preferences, k=random.randint(1, len(type_preferences))) for user_id in curr}

# Normalize price to a 1-10 scale
max_price = max(product["Price"] for product in product_catalog)
min_price = min(product["Price"] for product in product_catalog)
price_range = max_price - min_price

def normalize_price(price):
    return 1 + (((price - min_price) / price_range) * 9)

# Function to calculate rating based on brand, type, and price
def calculate_rating(user_id, product):
    price_score = normalize_price(product["Price"])
    brand_score = brand_preferences.get(product["Brand"], 5)  # Default to mid-preference
    type_score = 10 if product["Type"] in user_type_preferences[user_id] else 5
    # Weighted average of the scores
    return (price_score * 0.4 + brand_score * 0.3 + type_score * 0.3)

current = []
# Generate new ratings considering user preferences
for i in current_ratings:
        user_id  = i["UserId"]
        product= product_catalog[i["ProductId"]-1]
        # Calculate new rating based on sophisticated algorithm
        rating = calculate_rating(user_id, product)
        new_rating = {
            "UserId": user_id,
            "ProductId": product["Id"],
            "Rating": round(rating, 2)  # Rounded for readability
        }
        current.append(new_rating)


# Convert the updated data to JSON format
json_data = json.dumps(current, indent=4)

with open("./data/ratings/actualRatings.json", "w") as file:
    file.write(json_data)