In [None]:
# Loads the data from the Tesco Groceries dataset and assigns shelf numbers to each category based on a preset range for each division.

import pandas as pd
import numpy as np

# load data
data = pd.read_csv('./tesco_groceries_dataset.csv')

# get "breadcrumbs" column
breadcrumbs = data['breadcrumbs']

data.sort_values(by='breadcrumbs', inplace=True)

# breadcrumbs column contains a list of categories joined by ~. Split by ~ and take the first category
data['category'] = breadcrumbs.str.split('~').str[-2]

# division is breadcrumb[0]
data['division'] = breadcrumbs.str.split('~').str[0]

# create new dataframe with unique category column and a division column to show the division of each category
division_category_count = data.groupby(['division', 'category']).size().reset_index(name='count')

# Some categories should get a specific shelf number
special_category_map = {
    'Cooking Sauces, Meal Kits & Sides': 8,
    'Cooking Ingredients': 17,
    'Crisps, Snacks, Nuts & Popcorn': 3,
    'Batteries': 1
}

division_shelf_ranges = {
    'Baby': (79, 84),
    'Bakery': (18, 20),
    'Drinks': (38, 50),
    'Food Cupboard': (2, 17),
    'Fresh Food': (21, 37),
    'Frozen Food': (51, 55),
    'Health & Beauty': (56, 68),
    'Home & Ents': (85, 95),
    'Household': (69, 75),
    'Pets': (76, 78)
}


def assign_shelf(row, assigned_shelves):
    category = row["category"]
    division = row["division"]

    # If the category is in special_category_map, use the mapped value
    if category in special_category_map:
        return special_category_map[category]

    # Otherwise, assign an incremental value from the range for the division
    if division in division_shelf_ranges:
        range_start, range_end = division_shelf_ranges[division]
        if division not in assigned_shelves:
            assigned_shelves[division] = range_start

        shelf = assigned_shelves[division]

        # Increment the shelf for the next category
        assigned_shelves[division] += 1

        # Ensure we do not exceed the range end
        if assigned_shelves[division] > range_end:
            assigned_shelves[division] = range_end

        return shelf

    return None

# Initialize dictionary to keep track of assigned shelves for each division
assigned_shelves = {}

# Apply the function to assign shelves to each category
division_category_count["shelf"] = division_category_count.apply(
    assign_shelf,
    axis=1,
    args=(assigned_shelves,),
)

# convert to json
division_category_count_json = division_category_count.to_json(orient='records')

print(division_category_count_json)

[{"division":"Baby","category":"Baby & Toddler Food","count":4,"shelf":79},{"division":"Baby","category":"Baby & Toddler Toiletries","count":1,"shelf":80},{"division":"Baby","category":"Baby Bottles & Accessories","count":2,"shelf":81},{"division":"Baby","category":"Baby Weaning & Accessories","count":2,"shelf":82},{"division":"Baby","category":"Nappies & Pants","count":1,"shelf":83},{"division":"Baby","category":"Toys & Nursery Accessories","count":4,"shelf":84},{"division":"Bakery","category":"Bread & Rolls","count":2,"shelf":18},{"division":"Bakery","category":"Cakes, Cake Bars, Slices & Pies","count":5,"shelf":19},{"division":"Bakery","category":"Free From Bakery","count":2,"shelf":20},{"division":"Drinks","category":"Adult Soft Drinks & Mixers","count":1,"shelf":38},{"division":"Drinks","category":"Alcohol Gift Sets","count":1,"shelf":39},{"division":"Drinks","category":"Beer & Cider","count":7,"shelf":40},{"division":"Drinks","category":"Coffee","count":2,"shelf":41},{"division":

In [26]:
costco_data = pd.read_csv("./costco.csv")
categories = costco_data["Sub Category"]

print (categories.unique())

# count the number of rows each category appears in
category_counts = costco_data["Sub Category"].value_counts()
print(category_counts)

['Bakery & Desserts' 'Beverages & Water' 'Breakfast' 'Candy'
 'Cleaning Supplies' 'Coffee' 'Deli' 'Floral' 'Gift Baskets' 'Household'
 'Kirkland Signature Grocery' 'Laundry Detergent & Supplies'
 'Meat & Seafood' 'Organic' 'Pantry & Dry Goods'
 'Paper & Plastic Products' 'Poultry' 'Seafood' 'Snacks']
Snacks                          293
Pantry & Dry Goods              171
Candy                           154
Beverages & Water               148
Meat & Seafood                  144
Kirkland Signature Grocery      122
Coffee                           95
Cleaning Supplies                94
Gift Baskets                     89
Paper & Plastic Products         88
Household                        81
Floral                           75
Seafood                          47
Laundry Detergent & Supplies     39
Organic                          33
Bakery & Desserts                33
Deli                             21
Breakfast                        21
Poultry                           9
Name: Sub Cate